From 71e05eb84ac4cd941ecc8f773e837e36c2b3daaf Mon Sep 17 00:00:00 2001 From: Angus White Date: Tue, 12 Nov 2024 11:43:19 +1100 Subject: [PATCH 01/12] Compile new proto version --- protos/chunks.proto | 16 +- protos/chunks.v1.proto | 70 ++ protos/search.proto | 59 +- protos/search.v1.proto | 93 +++ sdks/node/src/grpc/chunks.ts | 201 +++--- sdks/node/src/grpc/google/protobuf/struct.ts | 99 +-- .../src/grpc/google/protobuf/timestamp.ts | 33 +- sdks/node/src/grpc/search.ts | 671 +++++++----------- sdks/python/src/redactive/grpc/v2/__init__.py | 327 +++++++++ 9 files changed, 947 insertions(+), 622 deletions(-) create mode 100644 protos/chunks.v1.proto create mode 100644 protos/search.v1.proto create mode 100644 sdks/python/src/redactive/grpc/v2/__init__.py diff --git a/protos/chunks.proto b/protos/chunks.proto index fead00f..f223946 100644 --- a/protos/chunks.proto +++ b/protos/chunks.proto @@ -1,9 +1,10 @@ syntax = "proto3"; -package redactive.grpc.v1; +package redactive.grpc.v2; import "google/protobuf/timestamp.proto"; + message ChunkMetadata { // Chunk content's creation timestamp optional google.protobuf.Timestamp created_at = 1; @@ -13,17 +14,18 @@ message ChunkMetadata { } message SourceReference { - // Source system of the document e.g. confluence, slack, google-drive + // Source system of the document e.g. confluence, slack, local_file_system string system = 1; // Version of the source system e.g. 1.0.0 string system_version = 2; - // Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id + // Connection id to the source system e.g. confluence space id, slack channel id, local file hostname string connection_id = 3; - // Document id in the source system e.g. confluence page id, slack message id, google-drive document id + // Document id in the source system e.g. confluence page id, slack message id, local file path string document_id = 4; - // Document version in the source system e.g. confluence page version, slack message version, google-drive document version + // Document version in the source system e.g. confluence page version, slack message version, local file version hash string document_version = 5; - // Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" + // Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" + // or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" optional string document_path = 6; // Document name in the source system e.g. "document.txt" optional string document_name = 7; @@ -32,7 +34,7 @@ message SourceReference { message ChunkReference { // Chunking version e.g. 1.0.0 string chunking_version = 1; - // chunk id is unique within the document, but not globally unique. + // chunk id is unique within the document, but not globally unique, it's actually the index of the chunk in the document string chunk_id = 2; // SHA256 hash of the chunk body string chunk_hash = 3; diff --git a/protos/chunks.v1.proto b/protos/chunks.v1.proto new file mode 100644 index 0000000..fead00f --- /dev/null +++ b/protos/chunks.v1.proto @@ -0,0 +1,70 @@ +syntax = "proto3"; + +package redactive.grpc.v1; + +import "google/protobuf/timestamp.proto"; + +message ChunkMetadata { + // Chunk content's creation timestamp + optional google.protobuf.Timestamp created_at = 1; + // Chunk content's last modified timestamp + optional google.protobuf.Timestamp modified_at = 2; + optional string link = 3; +} + +message SourceReference { + // Source system of the document e.g. confluence, slack, google-drive + string system = 1; + // Version of the source system e.g. 1.0.0 + string system_version = 2; + // Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id + string connection_id = 3; + // Document id in the source system e.g. confluence page id, slack message id, google-drive document id + string document_id = 4; + // Document version in the source system e.g. confluence page version, slack message version, google-drive document version + string document_version = 5; + // Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" + optional string document_path = 6; + // Document name in the source system e.g. "document.txt" + optional string document_name = 7; +} + +message ChunkReference { + // Chunking version e.g. 1.0.0 + string chunking_version = 1; + // chunk id is unique within the document, but not globally unique. + string chunk_id = 2; + // SHA256 hash of the chunk body + string chunk_hash = 3; +} + +// A chunk is a part of a document +message RelevantChunk { + message Relevance { + // Similarity score of the chunk + float similarity_score = 1; + } + + // Source reference of the document + SourceReference source = 1; + // Chunk reference of the chunk + ChunkReference chunk = 2; + // Relevance of the chunk + Relevance relevance = 3; + // Chunk body + string chunk_body = 4; + // Document metadata + ChunkMetadata document_metadata = 5; +} + +// A chunk is a part of a document +message Chunk { + // Source reference of the document + SourceReference source = 1; + // Chunk reference of the chunk + ChunkReference chunk = 2; + // Chunk body + string chunk_body = 3; + // Document metadata + ChunkMetadata document_metadata = 4; +} diff --git a/protos/search.proto b/protos/search.proto index 92a853a..f980734 100644 --- a/protos/search.proto +++ b/protos/search.proto @@ -1,29 +1,23 @@ syntax = "proto3"; -package redactive.grpc.v1; +package redactive.grpc.v2; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; - import "chunks.proto"; service Search { // Query the index for relevant chunks - rpc QueryChunks(QueryRequest) returns (QueryResponse); + rpc SearchChunks(SearchChunksRequest) returns (SearchChunksResponse); // Query the index for all chunks of a specific document - rpc QueryChunksByDocumentName(QueryByDocumentNameRequest) returns (QueryByDocumentNameResponse); - // Get chunks by URL - rpc GetChunksByUrl(GetChunksByUrlRequest) returns (GetChunksByUrlResponse); + rpc GetDocument(GetDocumentRequest) returns (GetDocumentResponse); } message Query { - // Semantic query to execute - string semantic_query = 1; -} - -message DocumentNameQuery { - // Document name to search for - string document_name = 1; + // Search query for semantic content + optional string semantic_query = 1; + // Specific keywords to search for in source document + optional string keyword_query = 2; } message TimeSpan { @@ -32,7 +26,10 @@ message TimeSpan { } message Filters { - // Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" + // Scope of the query. This may either be the name of a fetcher, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' repeated string scope = 1; // Timespan of response chunk's creation optional TimeSpan created = 2; @@ -44,7 +41,7 @@ message Filters { optional bool include_content_in_trash = 5; } -message QueryRequest { +message SearchChunksRequest { // How many results to try to return (maximum number of results) optional uint32 count = 1; // The query to execute @@ -53,7 +50,14 @@ message QueryRequest { optional Filters filters = 3; } -message QueryResponse { +message GetDocumentRequest { + // A reference to the document to retrieve + string ref = 1; + // Query filters (only really for GetDocByTitle) + optional Filters filters = 2; +} + +message SearchChunksResponse { // Query was successful bool success = 1; // Error message if query failed @@ -62,28 +66,7 @@ message QueryResponse { repeated RelevantChunk relevant_chunks = 3; } -message GetChunksByUrlRequest { - // URL to document - string url = 1; -} - -message GetChunksByUrlResponse { - // Fetch was successful - bool success = 1; - // Error message if fetch failed - optional google.protobuf.Struct error = 2; - // List of chunks - repeated Chunk chunks = 3; -} - -message QueryByDocumentNameRequest { - // The query to execute - DocumentNameQuery query = 2; - // Filters to apply to query - optional Filters filters = 3; -} - -message QueryByDocumentNameResponse { +message GetDocumentResponse { // Query was successful bool success = 1; // Error message if query failed diff --git a/protos/search.v1.proto b/protos/search.v1.proto new file mode 100644 index 0000000..92a853a --- /dev/null +++ b/protos/search.v1.proto @@ -0,0 +1,93 @@ +syntax = "proto3"; + +package redactive.grpc.v1; + +import "google/protobuf/struct.proto"; +import "google/protobuf/timestamp.proto"; + +import "chunks.proto"; + +service Search { + // Query the index for relevant chunks + rpc QueryChunks(QueryRequest) returns (QueryResponse); + // Query the index for all chunks of a specific document + rpc QueryChunksByDocumentName(QueryByDocumentNameRequest) returns (QueryByDocumentNameResponse); + // Get chunks by URL + rpc GetChunksByUrl(GetChunksByUrlRequest) returns (GetChunksByUrlResponse); +} + +message Query { + // Semantic query to execute + string semantic_query = 1; +} + +message DocumentNameQuery { + // Document name to search for + string document_name = 1; +} + +message TimeSpan { + optional google.protobuf.Timestamp after = 1; + optional google.protobuf.Timestamp before = 2; +} + +message Filters { + // Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" + repeated string scope = 1; + // Timespan of response chunk's creation + optional TimeSpan created = 2; + // Timespan of response chunk's last modification + optional TimeSpan modified = 3; + // List of user emails associated with response chunk + repeated string user_emails = 4; + // Include content from documents in trash + optional bool include_content_in_trash = 5; +} + +message QueryRequest { + // How many results to try to return (maximum number of results) + optional uint32 count = 1; + // The query to execute + Query query = 2; + // Filters to apply to query + optional Filters filters = 3; +} + +message QueryResponse { + // Query was successful + bool success = 1; + // Error message if query failed + optional google.protobuf.Struct error = 2; + // List of relevant chunks + repeated RelevantChunk relevant_chunks = 3; +} + +message GetChunksByUrlRequest { + // URL to document + string url = 1; +} + +message GetChunksByUrlResponse { + // Fetch was successful + bool success = 1; + // Error message if fetch failed + optional google.protobuf.Struct error = 2; + // List of chunks + repeated Chunk chunks = 3; +} + +message QueryByDocumentNameRequest { + // The query to execute + DocumentNameQuery query = 2; + // Filters to apply to query + optional Filters filters = 3; +} + +message QueryByDocumentNameResponse { + // Query was successful + bool success = 1; + // Error message if query failed + optional google.protobuf.Struct error = 2; + // List of relevant chunks + repeated Chunk chunks = 3; +} diff --git a/sdks/node/src/grpc/chunks.ts b/sdks/node/src/grpc/chunks.ts index bb0dce5..8cb3628 100644 --- a/sdks/node/src/grpc/chunks.ts +++ b/sdks/node/src/grpc/chunks.ts @@ -1,37 +1,43 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: chunks.proto /* eslint-disable */ import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; - import { Timestamp } from "./google/protobuf/timestamp"; -export const protobufPackage = "redactive.grpc.v1"; +export const protobufPackage = "redactive.grpc.v2"; export interface ChunkMetadata { /** Chunk content's creation timestamp */ - createdAt?: Date | undefined; + createdAt?: + | Date + | undefined; /** Chunk content's last modified timestamp */ modifiedAt?: Date | undefined; link?: string | undefined; } export interface SourceReference { - /** Source system of the document e.g. confluence, slack, google-drive */ + /** Source system of the document e.g. confluence, slack, local_file_system */ system: string; /** Version of the source system e.g. 1.0.0 */ systemVersion: string; - /** Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id */ + /** Connection id to the source system e.g. confluence space id, slack channel id, local file hostname */ connectionId: string; - /** Document id in the source system e.g. confluence page id, slack message id, google-drive document id */ + /** Document id in the source system e.g. confluence page id, slack message id, local file path */ documentId: string; - /** Document version in the source system e.g. confluence page version, slack message version, google-drive document version */ + /** Document version in the source system e.g. confluence page version, slack message version, local file version hash */ documentVersion: string; - /** Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" */ - documentPath?: string | undefined; + /** + * Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" + * or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" + */ + documentPath?: + | string + | undefined; /** Document name in the source system e.g. "document.txt" */ documentName?: string | undefined; } @@ -39,7 +45,7 @@ export interface SourceReference { export interface ChunkReference { /** Chunking version e.g. 1.0.0 */ chunkingVersion: string; - /** chunk id is unique within the document, but not globally unique. */ + /** chunk id is unique within the document, but not globally unique, it's actually the index of the chunk in the document */ chunkId: string; /** SHA256 hash of the chunk body */ chunkHash: string; @@ -48,11 +54,17 @@ export interface ChunkReference { /** A chunk is a part of a document */ export interface RelevantChunk { /** Source reference of the document */ - source: SourceReference | undefined; + source: + | SourceReference + | undefined; /** Chunk reference of the chunk */ - chunk: ChunkReference | undefined; + chunk: + | ChunkReference + | undefined; /** Relevance of the chunk */ - relevance: RelevantChunk_Relevance | undefined; + relevance: + | RelevantChunk_Relevance + | undefined; /** Chunk body */ chunkBody: string; /** Document metadata */ @@ -67,9 +79,13 @@ export interface RelevantChunk_Relevance { /** A chunk is a part of a document */ export interface Chunk { /** Source reference of the document */ - source: SourceReference | undefined; + source: + | SourceReference + | undefined; /** Chunk reference of the chunk */ - chunk: ChunkReference | undefined; + chunk: + | ChunkReference + | undefined; /** Chunk body */ chunkBody: string; /** Document metadata */ @@ -101,27 +117,30 @@ export const ChunkMetadata: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.createdAt = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.modifiedAt = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.link = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -135,7 +154,7 @@ export const ChunkMetadata: MessageFns = { return { createdAt: isSet(object.createdAt) ? fromJsonTimestamp(object.createdAt) : undefined, modifiedAt: isSet(object.modifiedAt) ? fromJsonTimestamp(object.modifiedAt) : undefined, - link: isSet(object.link) ? globalThis.String(object.link) : undefined + link: isSet(object.link) ? globalThis.String(object.link) : undefined, }; }, @@ -162,7 +181,7 @@ export const ChunkMetadata: MessageFns = { message.modifiedAt = object.modifiedAt ?? undefined; message.link = object.link ?? undefined; return message; - } + }, }; function createBaseSourceReference(): SourceReference { @@ -173,7 +192,7 @@ function createBaseSourceReference(): SourceReference { documentId: "", documentVersion: "", documentPath: undefined, - documentName: undefined + documentName: undefined, }; } @@ -210,55 +229,62 @@ export const SourceReference: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.system = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.systemVersion = reader.string(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.connectionId = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.documentId = reader.string(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.documentVersion = reader.string(); continue; - case 6: + } + case 6: { if (tag !== 50) { break; } message.documentPath = reader.string(); continue; - case 7: + } + case 7: { if (tag !== 58) { break; } message.documentName = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -276,7 +302,7 @@ export const SourceReference: MessageFns = { documentId: isSet(object.documentId) ? globalThis.String(object.documentId) : "", documentVersion: isSet(object.documentVersion) ? globalThis.String(object.documentVersion) : "", documentPath: isSet(object.documentPath) ? globalThis.String(object.documentPath) : undefined, - documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : undefined + documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : undefined, }; }, @@ -319,7 +345,7 @@ export const SourceReference: MessageFns = { message.documentPath = object.documentPath ?? undefined; message.documentName = object.documentName ?? undefined; return message; - } + }, }; function createBaseChunkReference(): ChunkReference { @@ -347,27 +373,30 @@ export const ChunkReference: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.chunkingVersion = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunkId = reader.string(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunkHash = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -381,7 +410,7 @@ export const ChunkReference: MessageFns = { return { chunkingVersion: isSet(object.chunkingVersion) ? globalThis.String(object.chunkingVersion) : "", chunkId: isSet(object.chunkId) ? globalThis.String(object.chunkId) : "", - chunkHash: isSet(object.chunkHash) ? globalThis.String(object.chunkHash) : "" + chunkHash: isSet(object.chunkHash) ? globalThis.String(object.chunkHash) : "", }; }, @@ -408,7 +437,7 @@ export const ChunkReference: MessageFns = { message.chunkId = object.chunkId ?? ""; message.chunkHash = object.chunkHash ?? ""; return message; - } + }, }; function createBaseRelevantChunk(): RelevantChunk { @@ -442,41 +471,46 @@ export const RelevantChunk: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.source = SourceReference.decode(reader, reader.uint32()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunk = ChunkReference.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.relevance = RelevantChunk_Relevance.decode(reader, reader.uint32()); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.chunkBody = reader.string(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.documentMetadata = ChunkMetadata.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -492,7 +526,7 @@ export const RelevantChunk: MessageFns = { chunk: isSet(object.chunk) ? ChunkReference.fromJSON(object.chunk) : undefined, relevance: isSet(object.relevance) ? RelevantChunk_Relevance.fromJSON(object.relevance) : undefined, chunkBody: isSet(object.chunkBody) ? globalThis.String(object.chunkBody) : "", - documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined + documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined, }; }, @@ -521,21 +555,21 @@ export const RelevantChunk: MessageFns = { }, fromPartial, I>>(object: I): RelevantChunk { const message = createBaseRelevantChunk(); - message.source = - object.source !== undefined && object.source !== null ? SourceReference.fromPartial(object.source) : undefined; - message.chunk = - object.chunk !== undefined && object.chunk !== null ? ChunkReference.fromPartial(object.chunk) : undefined; - message.relevance = - object.relevance !== undefined && object.relevance !== null - ? RelevantChunk_Relevance.fromPartial(object.relevance) - : undefined; + message.source = (object.source !== undefined && object.source !== null) + ? SourceReference.fromPartial(object.source) + : undefined; + message.chunk = (object.chunk !== undefined && object.chunk !== null) + ? ChunkReference.fromPartial(object.chunk) + : undefined; + message.relevance = (object.relevance !== undefined && object.relevance !== null) + ? RelevantChunk_Relevance.fromPartial(object.relevance) + : undefined; message.chunkBody = object.chunkBody ?? ""; - message.documentMetadata = - object.documentMetadata !== undefined && object.documentMetadata !== null - ? ChunkMetadata.fromPartial(object.documentMetadata) - : undefined; + message.documentMetadata = (object.documentMetadata !== undefined && object.documentMetadata !== null) + ? ChunkMetadata.fromPartial(object.documentMetadata) + : undefined; return message; - } + }, }; function createBaseRelevantChunk_Relevance(): RelevantChunk_Relevance { @@ -557,13 +591,14 @@ export const RelevantChunk_Relevance: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 13) { break; } message.similarityScore = reader.float(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -592,7 +627,7 @@ export const RelevantChunk_Relevance: MessageFns = { const message = createBaseRelevantChunk_Relevance(); message.similarityScore = object.similarityScore ?? 0; return message; - } + }, }; function createBaseChunk(): Chunk { @@ -623,34 +658,38 @@ export const Chunk: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.source = SourceReference.decode(reader, reader.uint32()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunk = ChunkReference.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunkBody = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.documentMetadata = ChunkMetadata.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -665,7 +704,7 @@ export const Chunk: MessageFns = { source: isSet(object.source) ? SourceReference.fromJSON(object.source) : undefined, chunk: isSet(object.chunk) ? ChunkReference.fromJSON(object.chunk) : undefined, chunkBody: isSet(object.chunkBody) ? globalThis.String(object.chunkBody) : "", - documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined + documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined, }; }, @@ -691,34 +730,30 @@ export const Chunk: MessageFns = { }, fromPartial, I>>(object: I): Chunk { const message = createBaseChunk(); - message.source = - object.source !== undefined && object.source !== null ? SourceReference.fromPartial(object.source) : undefined; - message.chunk = - object.chunk !== undefined && object.chunk !== null ? ChunkReference.fromPartial(object.chunk) : undefined; + message.source = (object.source !== undefined && object.source !== null) + ? SourceReference.fromPartial(object.source) + : undefined; + message.chunk = (object.chunk !== undefined && object.chunk !== null) + ? ChunkReference.fromPartial(object.chunk) + : undefined; message.chunkBody = object.chunkBody ?? ""; - message.documentMetadata = - object.documentMetadata !== undefined && object.documentMetadata !== null - ? ChunkMetadata.fromPartial(object.documentMetadata) - : undefined; + message.documentMetadata = (object.documentMetadata !== undefined && object.documentMetadata !== null) + ? ChunkMetadata.fromPartial(object.documentMetadata) + : undefined; return message; - } + }, }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin - ? T - : T extends globalThis.Array - ? globalThis.Array> - : T extends ReadonlyArray - ? ReadonlyArray> - : T extends {} - ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin ? T + : T extends globalThis.Array ? globalThis.Array> + : T extends ReadonlyArray ? ReadonlyArray> + : T extends {} ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin - ? P +export type Exact = P extends Builtin ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function toTimestamp(date: Date): Timestamp { diff --git a/sdks/node/src/grpc/google/protobuf/struct.ts b/sdks/node/src/grpc/google/protobuf/struct.ts index 426d4c5..25549bb 100644 --- a/sdks/node/src/grpc/google/protobuf/struct.ts +++ b/sdks/node/src/grpc/google/protobuf/struct.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: google/protobuf/struct.proto /* eslint-disable */ @@ -13,12 +13,12 @@ export const protobufPackage = "google.protobuf"; * `NullValue` is a singleton enumeration to represent the null value for the * `Value` type union. * - * The JSON representation for `NullValue` is JSON `null`. + * The JSON representation for `NullValue` is JSON `null`. */ export enum NullValue { /** NULL_VALUE - Null value. */ NULL_VALUE = 0, - UNRECOGNIZED = -1 + UNRECOGNIZED = -1, } export function nullValueFromJSON(object: any): NullValue { @@ -73,15 +73,25 @@ export interface Struct_FieldsEntry { */ export interface Value { /** Represents a null value. */ - nullValue?: NullValue | undefined; + nullValue?: + | NullValue + | undefined; /** Represents a double value. */ - numberValue?: number | undefined; + numberValue?: + | number + | undefined; /** Represents a string value. */ - stringValue?: string | undefined; + stringValue?: + | string + | undefined; /** Represents a boolean value. */ - boolValue?: boolean | undefined; + boolValue?: + | boolean + | undefined; /** Represents a structured value. */ - structValue?: { [key: string]: any } | undefined; + structValue?: + | { [key: string]: any } + | undefined; /** Represents a repeated `Value`. */ listValue?: Array | undefined; } @@ -117,7 +127,7 @@ export const Struct: MessageFns & StructWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } @@ -127,6 +137,7 @@ export const Struct: MessageFns & StructWrapperFns = { message.fields[entry1.key] = entry1.value; } continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -140,10 +151,10 @@ export const Struct: MessageFns & StructWrapperFns = { return { fields: isObject(object.fields) ? Object.entries(object.fields).reduce<{ [key: string]: any | undefined }>((acc, [key, value]) => { - acc[key] = value as any | undefined; - return acc; - }, {}) - : {} + acc[key] = value as any | undefined; + return acc; + }, {}) + : {}, }; }, @@ -173,7 +184,7 @@ export const Struct: MessageFns & StructWrapperFns = { } return acc; }, - {} + {}, ); return message; }, @@ -197,7 +208,7 @@ export const Struct: MessageFns & StructWrapperFns = { } } return object; - } + }, }; function createBaseStruct_FieldsEntry(): Struct_FieldsEntry { @@ -222,20 +233,22 @@ export const Struct_FieldsEntry: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.key = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.value = Value.unwrap(Value.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -248,7 +261,7 @@ export const Struct_FieldsEntry: MessageFns = { fromJSON(object: any): Struct_FieldsEntry { return { key: isSet(object.key) ? globalThis.String(object.key) : "", - value: isSet(object?.value) ? object.value : undefined + value: isSet(object?.value) ? object.value : undefined, }; }, @@ -271,7 +284,7 @@ export const Struct_FieldsEntry: MessageFns = { message.key = object.key ?? ""; message.value = object.value ?? undefined; return message; - } + }, }; function createBaseValue(): Value { @@ -281,7 +294,7 @@ function createBaseValue(): Value { stringValue: undefined, boolValue: undefined, structValue: undefined, - listValue: undefined + listValue: undefined, }; } @@ -315,48 +328,54 @@ export const Value: MessageFns & AnyValueWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.nullValue = reader.int32() as any; continue; - case 2: + } + case 2: { if (tag !== 17) { break; } message.numberValue = reader.double(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.stringValue = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 32) { break; } message.boolValue = reader.bool(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.structValue = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 6: + } + case 6: { if (tag !== 50) { break; } message.listValue = ListValue.unwrap(ListValue.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -373,7 +392,7 @@ export const Value: MessageFns & AnyValueWrapperFns = { stringValue: isSet(object.stringValue) ? globalThis.String(object.stringValue) : undefined, boolValue: isSet(object.boolValue) ? globalThis.Boolean(object.boolValue) : undefined, structValue: isObject(object.structValue) ? object.structValue : undefined, - listValue: globalThis.Array.isArray(object.listValue) ? [...object.listValue] : undefined + listValue: globalThis.Array.isArray(object.listValue) ? [...object.listValue] : undefined, }; }, @@ -449,7 +468,7 @@ export const Value: MessageFns & AnyValueWrapperFns = { return null; } return undefined; - } + }, }; function createBaseListValue(): ListValue { @@ -471,13 +490,14 @@ export const ListValue: MessageFns & ListValueWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.values.push(Value.unwrap(Value.decode(reader, reader.uint32()))); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -520,24 +540,19 @@ export const ListValue: MessageFns & ListValueWrapperFns = { } else { return message as any; } - } + }, }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin - ? T - : T extends globalThis.Array - ? globalThis.Array> - : T extends ReadonlyArray - ? ReadonlyArray> - : T extends {} - ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin ? T + : T extends globalThis.Array ? globalThis.Array> + : T extends ReadonlyArray ? ReadonlyArray> + : T extends {} ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin - ? P +export type Exact = P extends Builtin ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function isObject(value: any): boolean { diff --git a/sdks/node/src/grpc/google/protobuf/timestamp.ts b/sdks/node/src/grpc/google/protobuf/timestamp.ts index cab0fd8..20a7698 100644 --- a/sdks/node/src/grpc/google/protobuf/timestamp.ts +++ b/sdks/node/src/grpc/google/protobuf/timestamp.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: google/protobuf/timestamp.proto /* eslint-disable */ @@ -97,7 +97,7 @@ export const protobufPackage = "google.protobuf"; * [`strftime`](https://docs.python.org/2/library/time.html#time.strftime) with * the time format spec '%Y-%m-%dT%H:%M:%S.%fZ'. Likewise, in Java, one can use * the Joda Time's [`ISODateTimeFormat.dateTime()`]( - * http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime%2D%2D + * http://joda-time.sourceforge.net/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime() * ) to obtain a formatter capable of generating timestamps in this format. */ export interface Timestamp { @@ -138,20 +138,22 @@ export const Timestamp: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.seconds = longToNumber(reader.int64()); continue; - case 2: + } + case 2: { if (tag !== 16) { break; } message.nanos = reader.int32(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -164,7 +166,7 @@ export const Timestamp: MessageFns = { fromJSON(object: any): Timestamp { return { seconds: isSet(object.seconds) ? globalThis.Number(object.seconds) : 0, - nanos: isSet(object.nanos) ? globalThis.Number(object.nanos) : 0 + nanos: isSet(object.nanos) ? globalThis.Number(object.nanos) : 0, }; }, @@ -187,24 +189,19 @@ export const Timestamp: MessageFns = { message.seconds = object.seconds ?? 0; message.nanos = object.nanos ?? 0; return message; - } + }, }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin - ? T - : T extends globalThis.Array - ? globalThis.Array> - : T extends ReadonlyArray - ? ReadonlyArray> - : T extends {} - ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin ? T + : T extends globalThis.Array ? globalThis.Array> + : T extends ReadonlyArray ? ReadonlyArray> + : T extends {} ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin - ? P +export type Exact = P extends Builtin ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function longToNumber(int64: { toString(): string }): number { diff --git a/sdks/node/src/grpc/search.ts b/sdks/node/src/grpc/search.ts index 26617b9..026c452 100644 --- a/sdks/node/src/grpc/search.ts +++ b/sdks/node/src/grpc/search.ts @@ -1,38 +1,36 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: search.proto /* eslint-disable */ import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; import { + type CallOptions, ChannelCredentials, Client, - makeGenericClientConstructor, - Metadata, - type CallOptions, type ClientOptions, type ClientUnaryCall, type handleUnaryCall, + makeGenericClientConstructor, + Metadata, type ServiceError, - type UntypedServiceImplementation + type UntypedServiceImplementation, } from "@grpc/grpc-js"; - import { Chunk, RelevantChunk } from "./chunks"; import { Struct } from "./google/protobuf/struct"; import { Timestamp } from "./google/protobuf/timestamp"; -export const protobufPackage = "redactive.grpc.v1"; +export const protobufPackage = "redactive.grpc.v2"; export interface Query { - /** Semantic query to execute */ - semanticQuery: string; -} - -export interface DocumentNameQuery { - /** Document name to search for */ - documentName: string; + /** Search query for semantic content */ + semanticQuery?: + | string + | undefined; + /** Specific keywords to search for in source document */ + keywordQuery?: string | undefined; } export interface TimeSpan { @@ -41,75 +39,81 @@ export interface TimeSpan { } export interface Filters { - /** Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" */ + /** + * Scope of the query. This may either be the name of a fetcher, or a subspace of documents. + * Subspaces take the form of :/// + * e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + * for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + */ scope: string[]; /** Timespan of response chunk's creation */ - created?: TimeSpan | undefined; + created?: + | TimeSpan + | undefined; /** Timespan of response chunk's last modification */ - modified?: TimeSpan | undefined; + modified?: + | TimeSpan + | undefined; /** List of user emails associated with response chunk */ userEmails: string[]; /** Include content from documents in trash */ includeContentInTrash?: boolean | undefined; } -export interface QueryRequest { +export interface SearchChunksRequest { /** How many results to try to return (maximum number of results) */ - count?: number | undefined; + count?: + | number + | undefined; /** The query to execute */ - query: Query | undefined; + query: + | Query + | undefined; /** Filters to apply to query */ filters?: Filters | undefined; } -export interface QueryResponse { +export interface GetDocumentRequest { + /** A reference to the document to retrieve */ + ref: string; + /** Query filters (only really for GetDocByTitle) */ + filters?: Filters | undefined; +} + +export interface SearchChunksResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ - error?: { [key: string]: any } | undefined; + error?: + | { [key: string]: any } + | undefined; /** List of relevant chunks */ relevantChunks: RelevantChunk[]; } -export interface GetChunksByUrlRequest { - /** URL to document */ - url: string; -} - -export interface GetChunksByUrlResponse { - /** Fetch was successful */ - success: boolean; - /** Error message if fetch failed */ - error?: { [key: string]: any } | undefined; - /** List of chunks */ - chunks: Chunk[]; -} - -export interface QueryByDocumentNameRequest { - /** The query to execute */ - query: DocumentNameQuery | undefined; - /** Filters to apply to query */ - filters?: Filters | undefined; -} - -export interface QueryByDocumentNameResponse { +export interface GetDocumentResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ - error?: { [key: string]: any } | undefined; + error?: + | { [key: string]: any } + | undefined; /** List of relevant chunks */ chunks: Chunk[]; } function createBaseQuery(): Query { - return { semanticQuery: "" }; + return { semanticQuery: undefined, keywordQuery: undefined }; } export const Query: MessageFns = { encode(message: Query, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.semanticQuery !== "") { + if (message.semanticQuery !== undefined) { writer.uint32(10).string(message.semanticQuery); } + if (message.keywordQuery !== undefined) { + writer.uint32(18).string(message.keywordQuery); + } return writer; }, @@ -120,13 +124,22 @@ export const Query: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.semanticQuery = reader.string(); continue; + } + case 2: { + if (tag !== 18) { + break; + } + + message.keywordQuery = reader.string(); + continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -137,14 +150,20 @@ export const Query: MessageFns = { }, fromJSON(object: any): Query { - return { semanticQuery: isSet(object.semanticQuery) ? globalThis.String(object.semanticQuery) : "" }; + return { + semanticQuery: isSet(object.semanticQuery) ? globalThis.String(object.semanticQuery) : undefined, + keywordQuery: isSet(object.keywordQuery) ? globalThis.String(object.keywordQuery) : undefined, + }; }, toJSON(message: Query): unknown { const obj: any = {}; - if (message.semanticQuery !== "") { + if (message.semanticQuery !== undefined) { obj.semanticQuery = message.semanticQuery; } + if (message.keywordQuery !== undefined) { + obj.keywordQuery = message.keywordQuery; + } return obj; }, @@ -153,66 +172,10 @@ export const Query: MessageFns = { }, fromPartial, I>>(object: I): Query { const message = createBaseQuery(); - message.semanticQuery = object.semanticQuery ?? ""; - return message; - } -}; - -function createBaseDocumentNameQuery(): DocumentNameQuery { - return { documentName: "" }; -} - -export const DocumentNameQuery: MessageFns = { - encode(message: DocumentNameQuery, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.documentName !== "") { - writer.uint32(10).string(message.documentName); - } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): DocumentNameQuery { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseDocumentNameQuery(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 1: - if (tag !== 10) { - break; - } - - message.documentName = reader.string(); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } + message.semanticQuery = object.semanticQuery ?? undefined; + message.keywordQuery = object.keywordQuery ?? undefined; return message; }, - - fromJSON(object: any): DocumentNameQuery { - return { documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : "" }; - }, - - toJSON(message: DocumentNameQuery): unknown { - const obj: any = {}; - if (message.documentName !== "") { - obj.documentName = message.documentName; - } - return obj; - }, - - create, I>>(base?: I): DocumentNameQuery { - return DocumentNameQuery.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): DocumentNameQuery { - const message = createBaseDocumentNameQuery(); - message.documentName = object.documentName ?? ""; - return message; - } }; function createBaseTimeSpan(): TimeSpan { @@ -237,20 +200,22 @@ export const TimeSpan: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.after = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.before = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -263,7 +228,7 @@ export const TimeSpan: MessageFns = { fromJSON(object: any): TimeSpan { return { after: isSet(object.after) ? fromJsonTimestamp(object.after) : undefined, - before: isSet(object.before) ? fromJsonTimestamp(object.before) : undefined + before: isSet(object.before) ? fromJsonTimestamp(object.before) : undefined, }; }, @@ -286,7 +251,7 @@ export const TimeSpan: MessageFns = { message.after = object.after ?? undefined; message.before = object.before ?? undefined; return message; - } + }, }; function createBaseFilters(): Filters { @@ -320,41 +285,46 @@ export const Filters: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.scope.push(reader.string()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.created = TimeSpan.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.modified = TimeSpan.decode(reader, reader.uint32()); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.userEmails.push(reader.string()); continue; - case 5: + } + case 5: { if (tag !== 40) { break; } message.includeContentInTrash = reader.bool(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -374,7 +344,7 @@ export const Filters: MessageFns = { : [], includeContentInTrash: isSet(object.includeContentInTrash) ? globalThis.Boolean(object.includeContentInTrash) - : undefined + : undefined, }; }, @@ -404,22 +374,24 @@ export const Filters: MessageFns = { fromPartial, I>>(object: I): Filters { const message = createBaseFilters(); message.scope = object.scope?.map((e) => e) || []; - message.created = - object.created !== undefined && object.created !== null ? TimeSpan.fromPartial(object.created) : undefined; - message.modified = - object.modified !== undefined && object.modified !== null ? TimeSpan.fromPartial(object.modified) : undefined; + message.created = (object.created !== undefined && object.created !== null) + ? TimeSpan.fromPartial(object.created) + : undefined; + message.modified = (object.modified !== undefined && object.modified !== null) + ? TimeSpan.fromPartial(object.modified) + : undefined; message.userEmails = object.userEmails?.map((e) => e) || []; message.includeContentInTrash = object.includeContentInTrash ?? undefined; return message; - } + }, }; -function createBaseQueryRequest(): QueryRequest { +function createBaseSearchChunksRequest(): SearchChunksRequest { return { count: undefined, query: undefined, filters: undefined }; } -export const QueryRequest: MessageFns = { - encode(message: QueryRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const SearchChunksRequest: MessageFns = { + encode(message: SearchChunksRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.count !== undefined) { writer.uint32(8).uint32(message.count); } @@ -432,34 +404,37 @@ export const QueryRequest: MessageFns = { return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): QueryRequest { + decode(input: BinaryReader | Uint8Array, length?: number): SearchChunksRequest { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryRequest(); + const message = createBaseSearchChunksRequest(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.count = reader.uint32(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.query = Query.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.filters = Filters.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -469,15 +444,15 @@ export const QueryRequest: MessageFns = { return message; }, - fromJSON(object: any): QueryRequest { + fromJSON(object: any): SearchChunksRequest { return { count: isSet(object.count) ? globalThis.Number(object.count) : undefined, query: isSet(object.query) ? Query.fromJSON(object.query) : undefined, - filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined + filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined, }; }, - toJSON(message: QueryRequest): unknown { + toJSON(message: SearchChunksRequest): unknown { const obj: any = {}; if (message.count !== undefined) { obj.count = Math.round(message.count); @@ -491,65 +466,58 @@ export const QueryRequest: MessageFns = { return obj; }, - create, I>>(base?: I): QueryRequest { - return QueryRequest.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): SearchChunksRequest { + return SearchChunksRequest.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): QueryRequest { - const message = createBaseQueryRequest(); + fromPartial, I>>(object: I): SearchChunksRequest { + const message = createBaseSearchChunksRequest(); message.count = object.count ?? undefined; - message.query = object.query !== undefined && object.query !== null ? Query.fromPartial(object.query) : undefined; - message.filters = - object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; + message.query = (object.query !== undefined && object.query !== null) ? Query.fromPartial(object.query) : undefined; + message.filters = (object.filters !== undefined && object.filters !== null) + ? Filters.fromPartial(object.filters) + : undefined; return message; - } + }, }; -function createBaseQueryResponse(): QueryResponse { - return { success: false, error: undefined, relevantChunks: [] }; +function createBaseGetDocumentRequest(): GetDocumentRequest { + return { ref: "", filters: undefined }; } -export const QueryResponse: MessageFns = { - encode(message: QueryResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.success !== false) { - writer.uint32(8).bool(message.success); - } - if (message.error !== undefined) { - Struct.encode(Struct.wrap(message.error), writer.uint32(18).fork()).join(); +export const GetDocumentRequest: MessageFns = { + encode(message: GetDocumentRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.ref !== "") { + writer.uint32(10).string(message.ref); } - for (const v of message.relevantChunks) { - RelevantChunk.encode(v!, writer.uint32(26).fork()).join(); + if (message.filters !== undefined) { + Filters.encode(message.filters, writer.uint32(18).fork()).join(); } return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): QueryResponse { + decode(input: BinaryReader | Uint8Array, length?: number): GetDocumentRequest { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryResponse(); + const message = createBaseGetDocumentRequest(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: - if (tag !== 8) { + case 1: { + if (tag !== 10) { break; } - message.success = reader.bool(); + message.ref = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } - message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); - continue; - case 3: - if (tag !== 26) { - break; - } - - message.relevantChunks.push(RelevantChunk.decode(reader, reader.uint32())); + message.filters = Filters.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -559,145 +527,86 @@ export const QueryResponse: MessageFns = { return message; }, - fromJSON(object: any): QueryResponse { + fromJSON(object: any): GetDocumentRequest { return { - success: isSet(object.success) ? globalThis.Boolean(object.success) : false, - error: isObject(object.error) ? object.error : undefined, - relevantChunks: globalThis.Array.isArray(object?.relevantChunks) - ? object.relevantChunks.map((e: any) => RelevantChunk.fromJSON(e)) - : [] + ref: isSet(object.ref) ? globalThis.String(object.ref) : "", + filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined, }; }, - toJSON(message: QueryResponse): unknown { + toJSON(message: GetDocumentRequest): unknown { const obj: any = {}; - if (message.success !== false) { - obj.success = message.success; + if (message.ref !== "") { + obj.ref = message.ref; } - if (message.error !== undefined) { - obj.error = message.error; - } - if (message.relevantChunks?.length) { - obj.relevantChunks = message.relevantChunks.map((e) => RelevantChunk.toJSON(e)); + if (message.filters !== undefined) { + obj.filters = Filters.toJSON(message.filters); } return obj; }, - create, I>>(base?: I): QueryResponse { - return QueryResponse.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): GetDocumentRequest { + return GetDocumentRequest.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): QueryResponse { - const message = createBaseQueryResponse(); - message.success = object.success ?? false; - message.error = object.error ?? undefined; - message.relevantChunks = object.relevantChunks?.map((e) => RelevantChunk.fromPartial(e)) || []; + fromPartial, I>>(object: I): GetDocumentRequest { + const message = createBaseGetDocumentRequest(); + message.ref = object.ref ?? ""; + message.filters = (object.filters !== undefined && object.filters !== null) + ? Filters.fromPartial(object.filters) + : undefined; return message; - } -}; - -function createBaseGetChunksByUrlRequest(): GetChunksByUrlRequest { - return { url: "" }; -} - -export const GetChunksByUrlRequest: MessageFns = { - encode(message: GetChunksByUrlRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.url !== "") { - writer.uint32(10).string(message.url); - } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): GetChunksByUrlRequest { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseGetChunksByUrlRequest(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 1: - if (tag !== 10) { - break; - } - - message.url = reader.string(); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } - return message; - }, - - fromJSON(object: any): GetChunksByUrlRequest { - return { url: isSet(object.url) ? globalThis.String(object.url) : "" }; - }, - - toJSON(message: GetChunksByUrlRequest): unknown { - const obj: any = {}; - if (message.url !== "") { - obj.url = message.url; - } - return obj; }, - - create, I>>(base?: I): GetChunksByUrlRequest { - return GetChunksByUrlRequest.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): GetChunksByUrlRequest { - const message = createBaseGetChunksByUrlRequest(); - message.url = object.url ?? ""; - return message; - } }; -function createBaseGetChunksByUrlResponse(): GetChunksByUrlResponse { - return { success: false, error: undefined, chunks: [] }; +function createBaseSearchChunksResponse(): SearchChunksResponse { + return { success: false, error: undefined, relevantChunks: [] }; } -export const GetChunksByUrlResponse: MessageFns = { - encode(message: GetChunksByUrlResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const SearchChunksResponse: MessageFns = { + encode(message: SearchChunksResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.success !== false) { writer.uint32(8).bool(message.success); } if (message.error !== undefined) { Struct.encode(Struct.wrap(message.error), writer.uint32(18).fork()).join(); } - for (const v of message.chunks) { - Chunk.encode(v!, writer.uint32(26).fork()).join(); + for (const v of message.relevantChunks) { + RelevantChunk.encode(v!, writer.uint32(26).fork()).join(); } return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): GetChunksByUrlResponse { + decode(input: BinaryReader | Uint8Array, length?: number): SearchChunksResponse { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseGetChunksByUrlResponse(); + const message = createBaseSearchChunksResponse(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.success = reader.bool(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } - message.chunks.push(Chunk.decode(reader, reader.uint32())); + message.relevantChunks.push(RelevantChunk.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -707,15 +616,17 @@ export const GetChunksByUrlResponse: MessageFns = { return message; }, - fromJSON(object: any): GetChunksByUrlResponse { + fromJSON(object: any): SearchChunksResponse { return { success: isSet(object.success) ? globalThis.Boolean(object.success) : false, error: isObject(object.error) ? object.error : undefined, - chunks: globalThis.Array.isArray(object?.chunks) ? object.chunks.map((e: any) => Chunk.fromJSON(e)) : [] + relevantChunks: globalThis.Array.isArray(object?.relevantChunks) + ? object.relevantChunks.map((e: any) => RelevantChunk.fromJSON(e)) + : [], }; }, - toJSON(message: GetChunksByUrlResponse): unknown { + toJSON(message: SearchChunksResponse): unknown { const obj: any = {}; if (message.success !== false) { obj.success = message.success; @@ -723,106 +634,30 @@ export const GetChunksByUrlResponse: MessageFns = { if (message.error !== undefined) { obj.error = message.error; } - if (message.chunks?.length) { - obj.chunks = message.chunks.map((e) => Chunk.toJSON(e)); + if (message.relevantChunks?.length) { + obj.relevantChunks = message.relevantChunks.map((e) => RelevantChunk.toJSON(e)); } return obj; }, - create, I>>(base?: I): GetChunksByUrlResponse { - return GetChunksByUrlResponse.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): SearchChunksResponse { + return SearchChunksResponse.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): GetChunksByUrlResponse { - const message = createBaseGetChunksByUrlResponse(); + fromPartial, I>>(object: I): SearchChunksResponse { + const message = createBaseSearchChunksResponse(); message.success = object.success ?? false; message.error = object.error ?? undefined; - message.chunks = object.chunks?.map((e) => Chunk.fromPartial(e)) || []; - return message; - } -}; - -function createBaseQueryByDocumentNameRequest(): QueryByDocumentNameRequest { - return { query: undefined, filters: undefined }; -} - -export const QueryByDocumentNameRequest: MessageFns = { - encode(message: QueryByDocumentNameRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.query !== undefined) { - DocumentNameQuery.encode(message.query, writer.uint32(18).fork()).join(); - } - if (message.filters !== undefined) { - Filters.encode(message.filters, writer.uint32(26).fork()).join(); - } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): QueryByDocumentNameRequest { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryByDocumentNameRequest(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 2: - if (tag !== 18) { - break; - } - - message.query = DocumentNameQuery.decode(reader, reader.uint32()); - continue; - case 3: - if (tag !== 26) { - break; - } - - message.filters = Filters.decode(reader, reader.uint32()); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } + message.relevantChunks = object.relevantChunks?.map((e) => RelevantChunk.fromPartial(e)) || []; return message; }, - - fromJSON(object: any): QueryByDocumentNameRequest { - return { - query: isSet(object.query) ? DocumentNameQuery.fromJSON(object.query) : undefined, - filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined - }; - }, - - toJSON(message: QueryByDocumentNameRequest): unknown { - const obj: any = {}; - if (message.query !== undefined) { - obj.query = DocumentNameQuery.toJSON(message.query); - } - if (message.filters !== undefined) { - obj.filters = Filters.toJSON(message.filters); - } - return obj; - }, - - create, I>>(base?: I): QueryByDocumentNameRequest { - return QueryByDocumentNameRequest.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): QueryByDocumentNameRequest { - const message = createBaseQueryByDocumentNameRequest(); - message.query = - object.query !== undefined && object.query !== null ? DocumentNameQuery.fromPartial(object.query) : undefined; - message.filters = - object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; - return message; - } }; -function createBaseQueryByDocumentNameResponse(): QueryByDocumentNameResponse { +function createBaseGetDocumentResponse(): GetDocumentResponse { return { success: false, error: undefined, chunks: [] }; } -export const QueryByDocumentNameResponse: MessageFns = { - encode(message: QueryByDocumentNameResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const GetDocumentResponse: MessageFns = { + encode(message: GetDocumentResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.success !== false) { writer.uint32(8).bool(message.success); } @@ -835,34 +670,37 @@ export const QueryByDocumentNameResponse: MessageFns>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.success = reader.bool(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunks.push(Chunk.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -872,15 +710,15 @@ export const QueryByDocumentNameResponse: MessageFns Chunk.fromJSON(e)) : [] + chunks: globalThis.Array.isArray(object?.chunks) ? object.chunks.map((e: any) => Chunk.fromJSON(e)) : [], }; }, - toJSON(message: QueryByDocumentNameResponse): unknown { + toJSON(message: GetDocumentResponse): unknown { const obj: any = {}; if (message.success !== false) { obj.success = message.success; @@ -894,115 +732,85 @@ export const QueryByDocumentNameResponse: MessageFns, I>>(base?: I): QueryByDocumentNameResponse { - return QueryByDocumentNameResponse.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): GetDocumentResponse { + return GetDocumentResponse.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): QueryByDocumentNameResponse { - const message = createBaseQueryByDocumentNameResponse(); + fromPartial, I>>(object: I): GetDocumentResponse { + const message = createBaseGetDocumentResponse(); message.success = object.success ?? false; message.error = object.error ?? undefined; message.chunks = object.chunks?.map((e) => Chunk.fromPartial(e)) || []; return message; - } + }, }; export type SearchService = typeof SearchService; export const SearchService = { /** Query the index for relevant chunks */ - queryChunks: { - path: "/redactive.grpc.v1.Search/QueryChunks", + searchChunks: { + path: "/redactive.grpc.v2.Search/SearchChunks", requestStream: false, responseStream: false, - requestSerialize: (value: QueryRequest) => Buffer.from(QueryRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => QueryRequest.decode(value), - responseSerialize: (value: QueryResponse) => Buffer.from(QueryResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => QueryResponse.decode(value) + requestSerialize: (value: SearchChunksRequest) => Buffer.from(SearchChunksRequest.encode(value).finish()), + requestDeserialize: (value: Buffer) => SearchChunksRequest.decode(value), + responseSerialize: (value: SearchChunksResponse) => Buffer.from(SearchChunksResponse.encode(value).finish()), + responseDeserialize: (value: Buffer) => SearchChunksResponse.decode(value), }, /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName: { - path: "/redactive.grpc.v1.Search/QueryChunksByDocumentName", + getDocument: { + path: "/redactive.grpc.v2.Search/GetDocument", requestStream: false, responseStream: false, - requestSerialize: (value: QueryByDocumentNameRequest) => - Buffer.from(QueryByDocumentNameRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => QueryByDocumentNameRequest.decode(value), - responseSerialize: (value: QueryByDocumentNameResponse) => - Buffer.from(QueryByDocumentNameResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => QueryByDocumentNameResponse.decode(value) + requestSerialize: (value: GetDocumentRequest) => Buffer.from(GetDocumentRequest.encode(value).finish()), + requestDeserialize: (value: Buffer) => GetDocumentRequest.decode(value), + responseSerialize: (value: GetDocumentResponse) => Buffer.from(GetDocumentResponse.encode(value).finish()), + responseDeserialize: (value: Buffer) => GetDocumentResponse.decode(value), }, - /** Get chunks by URL */ - getChunksByUrl: { - path: "/redactive.grpc.v1.Search/GetChunksByUrl", - requestStream: false, - responseStream: false, - requestSerialize: (value: GetChunksByUrlRequest) => Buffer.from(GetChunksByUrlRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => GetChunksByUrlRequest.decode(value), - responseSerialize: (value: GetChunksByUrlResponse) => Buffer.from(GetChunksByUrlResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => GetChunksByUrlResponse.decode(value) - } } as const; export interface SearchServer extends UntypedServiceImplementation { /** Query the index for relevant chunks */ - queryChunks: handleUnaryCall; + searchChunks: handleUnaryCall; /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName: handleUnaryCall; - /** Get chunks by URL */ - getChunksByUrl: handleUnaryCall; + getDocument: handleUnaryCall; } export interface SearchClient extends Client { /** Query the index for relevant chunks */ - queryChunks( - request: QueryRequest, - callback: (error: ServiceError | null, response: QueryResponse) => void + searchChunks( + request: SearchChunksRequest, + callback: (error: ServiceError | null, response: SearchChunksResponse) => void, ): ClientUnaryCall; - queryChunks( - request: QueryRequest, + searchChunks( + request: SearchChunksRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: QueryResponse) => void + callback: (error: ServiceError | null, response: SearchChunksResponse) => void, ): ClientUnaryCall; - queryChunks( - request: QueryRequest, + searchChunks( + request: SearchChunksRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: QueryResponse) => void + callback: (error: ServiceError | null, response: SearchChunksResponse) => void, ): ClientUnaryCall; /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - metadata: Metadata, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - metadata: Metadata, - options: Partial, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - /** Get chunks by URL */ - getChunksByUrl( - request: GetChunksByUrlRequest, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + getDocument( + request: GetDocumentRequest, + callback: (error: ServiceError | null, response: GetDocumentResponse) => void, ): ClientUnaryCall; - getChunksByUrl( - request: GetChunksByUrlRequest, + getDocument( + request: GetDocumentRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + callback: (error: ServiceError | null, response: GetDocumentResponse) => void, ): ClientUnaryCall; - getChunksByUrl( - request: GetChunksByUrlRequest, + getDocument( + request: GetDocumentRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + callback: (error: ServiceError | null, response: GetDocumentResponse) => void, ): ClientUnaryCall; } -export const SearchClient = makeGenericClientConstructor(SearchService, "redactive.grpc.v1.Search") as unknown as { +export const SearchClient = makeGenericClientConstructor(SearchService, "redactive.grpc.v2.Search") as unknown as { new (address: string, credentials: ChannelCredentials, options?: Partial): SearchClient; service: typeof SearchService; serviceName: string; @@ -1010,19 +818,14 @@ export const SearchClient = makeGenericClientConstructor(SearchService, "redacti type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin - ? T - : T extends globalThis.Array - ? globalThis.Array> - : T extends ReadonlyArray - ? ReadonlyArray> - : T extends {} - ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin ? T + : T extends globalThis.Array ? globalThis.Array> + : T extends ReadonlyArray ? ReadonlyArray> + : T extends {} ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin - ? P +export type Exact = P extends Builtin ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function toTimestamp(date: Date): Timestamp { diff --git a/sdks/python/src/redactive/grpc/v2/__init__.py b/sdks/python/src/redactive/grpc/v2/__init__.py new file mode 100644 index 0000000..04e63d0 --- /dev/null +++ b/sdks/python/src/redactive/grpc/v2/__init__.py @@ -0,0 +1,327 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# sources: chunks.proto, search.proto +# plugin: python-betterproto +# This file has been @generated + +from dataclasses import dataclass +from datetime import datetime +from typing import ( + TYPE_CHECKING, + Dict, + List, + Optional, +) + +import betterproto +import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf +import grpclib +from betterproto.grpc.grpclib_server import ServiceBase + + +if TYPE_CHECKING: + import grpclib.server + from betterproto.grpc.grpclib_client import MetadataLike + from grpclib.metadata import Deadline + + +@dataclass(eq=False, repr=False) +class ChunkMetadata(betterproto.Message): + created_at: Optional[datetime] = betterproto.message_field( + 1, optional=True, group="_created_at" + ) + """Chunk content's creation timestamp""" + + modified_at: Optional[datetime] = betterproto.message_field( + 2, optional=True, group="_modified_at" + ) + """Chunk content's last modified timestamp""" + + link: Optional[str] = betterproto.string_field(3, optional=True, group="_link") + + +@dataclass(eq=False, repr=False) +class SourceReference(betterproto.Message): + system: str = betterproto.string_field(1) + """ + Source system of the document e.g. confluence, slack, local_file_system + """ + + system_version: str = betterproto.string_field(2) + """Version of the source system e.g. 1.0.0""" + + connection_id: str = betterproto.string_field(3) + """ + Connection id to the source system e.g. confluence space id, slack channel + id, local file hostname + """ + + document_id: str = betterproto.string_field(4) + """ + Document id in the source system e.g. confluence page id, slack message id, + local file path + """ + + document_version: str = betterproto.string_field(5) + """ + Document version in the source system e.g. confluence page version, slack + message version, local file version hash + """ + + document_path: Optional[str] = betterproto.string_field( + 6, optional=True, group="_document_path" + ) + """ + Document path in the source system e.g. + "redactiveai.atlassian.net/Engineering/Onboarding Guide" or + "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding + Guide.pdf" + """ + + document_name: Optional[str] = betterproto.string_field( + 7, optional=True, group="_document_name" + ) + """Document name in the source system e.g. "document.txt""" + + +@dataclass(eq=False, repr=False) +class ChunkReference(betterproto.Message): + chunking_version: str = betterproto.string_field(1) + """Chunking version e.g. 1.0.0""" + + chunk_id: str = betterproto.string_field(2) + """ + chunk id is unique within the document, but not globally unique, it's + actually the index of the chunk in the document + """ + + chunk_hash: str = betterproto.string_field(3) + """SHA256 hash of the chunk body""" + + +@dataclass(eq=False, repr=False) +class RelevantChunk(betterproto.Message): + """A chunk is a part of a document""" + + source: "SourceReference" = betterproto.message_field(1) + """Source reference of the document""" + + chunk: "ChunkReference" = betterproto.message_field(2) + """Chunk reference of the chunk""" + + relevance: "RelevantChunkRelevance" = betterproto.message_field(3) + """Relevance of the chunk""" + + chunk_body: str = betterproto.string_field(4) + """Chunk body""" + + document_metadata: "ChunkMetadata" = betterproto.message_field(5) + """Document metadata""" + + +@dataclass(eq=False, repr=False) +class RelevantChunkRelevance(betterproto.Message): + similarity_score: float = betterproto.float_field(1) + """Similarity score of the chunk""" + + +@dataclass(eq=False, repr=False) +class Chunk(betterproto.Message): + """A chunk is a part of a document""" + + source: "SourceReference" = betterproto.message_field(1) + """Source reference of the document""" + + chunk: "ChunkReference" = betterproto.message_field(2) + """Chunk reference of the chunk""" + + chunk_body: str = betterproto.string_field(3) + """Chunk body""" + + document_metadata: "ChunkMetadata" = betterproto.message_field(4) + """Document metadata""" + + +@dataclass(eq=False, repr=False) +class Query(betterproto.Message): + semantic_query: Optional[str] = betterproto.string_field( + 1, optional=True, group="_semantic_query" + ) + """Search query for semantic content""" + + keyword_query: Optional[str] = betterproto.string_field( + 2, optional=True, group="_keyword_query" + ) + """Specific keywords to search for in source document""" + + +@dataclass(eq=False, repr=False) +class TimeSpan(betterproto.Message): + after: Optional[datetime] = betterproto.message_field( + 1, optional=True, group="_after" + ) + before: Optional[datetime] = betterproto.message_field( + 2, optional=True, group="_before" + ) + + +@dataclass(eq=False, repr=False) +class Filters(betterproto.Message): + scope: List[str] = betterproto.string_field(1) + """ + Scope of the query. This may either be the name of a fetcher, or a subspace + of documents. Subspaces take the form of :/// e.g. + for Confluence: + 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding + Guide' for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared + Documents/Engineering/Onboarding Guide.pdf' + """ + + created: Optional["TimeSpan"] = betterproto.message_field( + 2, optional=True, group="_created" + ) + """Timespan of response chunk's creation""" + + modified: Optional["TimeSpan"] = betterproto.message_field( + 3, optional=True, group="_modified" + ) + """Timespan of response chunk's last modification""" + + user_emails: List[str] = betterproto.string_field(4) + """List of user emails associated with response chunk""" + + include_content_in_trash: Optional[bool] = betterproto.bool_field( + 5, optional=True, group="_include_content_in_trash" + ) + """Include content from documents in trash""" + + +@dataclass(eq=False, repr=False) +class SearchChunksRequest(betterproto.Message): + count: Optional[int] = betterproto.uint32_field(1, optional=True, group="_count") + """How many results to try to return (maximum number of results)""" + + query: "Query" = betterproto.message_field(2) + """The query to execute""" + + filters: Optional["Filters"] = betterproto.message_field( + 3, optional=True, group="_filters" + ) + """Filters to apply to query""" + + +@dataclass(eq=False, repr=False) +class GetDocumentRequest(betterproto.Message): + ref: str = betterproto.string_field(1) + """A reference to the document to retrieve""" + + filters: Optional["Filters"] = betterproto.message_field( + 2, optional=True, group="_filters" + ) + """Query filters (only really for GetDocByTitle)""" + + +@dataclass(eq=False, repr=False) +class SearchChunksResponse(betterproto.Message): + success: bool = betterproto.bool_field(1) + """Query was successful""" + + error: Optional["betterproto_lib_google_protobuf.Struct"] = ( + betterproto.message_field(2, optional=True, group="_error") + ) + """Error message if query failed""" + + relevant_chunks: List["RelevantChunk"] = betterproto.message_field(3) + """List of relevant chunks""" + + +@dataclass(eq=False, repr=False) +class GetDocumentResponse(betterproto.Message): + success: bool = betterproto.bool_field(1) + """Query was successful""" + + error: Optional["betterproto_lib_google_protobuf.Struct"] = ( + betterproto.message_field(2, optional=True, group="_error") + ) + """Error message if query failed""" + + chunks: List["Chunk"] = betterproto.message_field(3) + """List of relevant chunks""" + + +class SearchStub(betterproto.ServiceStub): + async def search_chunks( + self, + search_chunks_request: "SearchChunksRequest", + *, + timeout: Optional[float] = None, + deadline: Optional["Deadline"] = None, + metadata: Optional["MetadataLike"] = None + ) -> "SearchChunksResponse": + return await self._unary_unary( + "/redactive.grpc.v2.Search/SearchChunks", + search_chunks_request, + SearchChunksResponse, + timeout=timeout, + deadline=deadline, + metadata=metadata, + ) + + async def get_document( + self, + get_document_request: "GetDocumentRequest", + *, + timeout: Optional[float] = None, + deadline: Optional["Deadline"] = None, + metadata: Optional["MetadataLike"] = None + ) -> "GetDocumentResponse": + return await self._unary_unary( + "/redactive.grpc.v2.Search/GetDocument", + get_document_request, + GetDocumentResponse, + timeout=timeout, + deadline=deadline, + metadata=metadata, + ) + + +class SearchBase(ServiceBase): + + async def search_chunks( + self, search_chunks_request: "SearchChunksRequest" + ) -> "SearchChunksResponse": + raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) + + async def get_document( + self, get_document_request: "GetDocumentRequest" + ) -> "GetDocumentResponse": + raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) + + async def __rpc_search_chunks( + self, stream: "grpclib.server.Stream[SearchChunksRequest, SearchChunksResponse]" + ) -> None: + request = await stream.recv_message() + response = await self.search_chunks(request) + await stream.send_message(response) + + async def __rpc_get_document( + self, stream: "grpclib.server.Stream[GetDocumentRequest, GetDocumentResponse]" + ) -> None: + request = await stream.recv_message() + response = await self.get_document(request) + await stream.send_message(response) + + def __mapping__(self) -> Dict[str, grpclib.const.Handler]: + return { + "/redactive.grpc.v2.Search/SearchChunks": grpclib.const.Handler( + self.__rpc_search_chunks, + grpclib.const.Cardinality.UNARY_UNARY, + SearchChunksRequest, + SearchChunksResponse, + ), + "/redactive.grpc.v2.Search/GetDocument": grpclib.const.Handler( + self.__rpc_get_document, + grpclib.const.Cardinality.UNARY_UNARY, + GetDocumentRequest, + GetDocumentResponse, + ), + } From 64c735ef728586760a05aa63587ea20b4726300f Mon Sep 17 00:00:00 2001 From: Angus White Date: Thu, 14 Nov 2024 16:44:42 +1100 Subject: [PATCH 02/12] Implement node sdk --- protos/chunks.v1.proto | 70 ---- protos/search.v1.proto | 93 ----- sdks/node/src/multiUserClient.ts | 42 +-- sdks/node/src/searchClient.ts | 92 ++--- sdks/python/src/redactive/grpc/v1/__init__.py | 344 ------------------ sdks/python/src/redactive/search_client.py | 2 +- 6 files changed, 66 insertions(+), 577 deletions(-) delete mode 100644 protos/chunks.v1.proto delete mode 100644 protos/search.v1.proto delete mode 100644 sdks/python/src/redactive/grpc/v1/__init__.py diff --git a/protos/chunks.v1.proto b/protos/chunks.v1.proto deleted file mode 100644 index fead00f..0000000 --- a/protos/chunks.v1.proto +++ /dev/null @@ -1,70 +0,0 @@ -syntax = "proto3"; - -package redactive.grpc.v1; - -import "google/protobuf/timestamp.proto"; - -message ChunkMetadata { - // Chunk content's creation timestamp - optional google.protobuf.Timestamp created_at = 1; - // Chunk content's last modified timestamp - optional google.protobuf.Timestamp modified_at = 2; - optional string link = 3; -} - -message SourceReference { - // Source system of the document e.g. confluence, slack, google-drive - string system = 1; - // Version of the source system e.g. 1.0.0 - string system_version = 2; - // Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id - string connection_id = 3; - // Document id in the source system e.g. confluence page id, slack message id, google-drive document id - string document_id = 4; - // Document version in the source system e.g. confluence page version, slack message version, google-drive document version - string document_version = 5; - // Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" - optional string document_path = 6; - // Document name in the source system e.g. "document.txt" - optional string document_name = 7; -} - -message ChunkReference { - // Chunking version e.g. 1.0.0 - string chunking_version = 1; - // chunk id is unique within the document, but not globally unique. - string chunk_id = 2; - // SHA256 hash of the chunk body - string chunk_hash = 3; -} - -// A chunk is a part of a document -message RelevantChunk { - message Relevance { - // Similarity score of the chunk - float similarity_score = 1; - } - - // Source reference of the document - SourceReference source = 1; - // Chunk reference of the chunk - ChunkReference chunk = 2; - // Relevance of the chunk - Relevance relevance = 3; - // Chunk body - string chunk_body = 4; - // Document metadata - ChunkMetadata document_metadata = 5; -} - -// A chunk is a part of a document -message Chunk { - // Source reference of the document - SourceReference source = 1; - // Chunk reference of the chunk - ChunkReference chunk = 2; - // Chunk body - string chunk_body = 3; - // Document metadata - ChunkMetadata document_metadata = 4; -} diff --git a/protos/search.v1.proto b/protos/search.v1.proto deleted file mode 100644 index 92a853a..0000000 --- a/protos/search.v1.proto +++ /dev/null @@ -1,93 +0,0 @@ -syntax = "proto3"; - -package redactive.grpc.v1; - -import "google/protobuf/struct.proto"; -import "google/protobuf/timestamp.proto"; - -import "chunks.proto"; - -service Search { - // Query the index for relevant chunks - rpc QueryChunks(QueryRequest) returns (QueryResponse); - // Query the index for all chunks of a specific document - rpc QueryChunksByDocumentName(QueryByDocumentNameRequest) returns (QueryByDocumentNameResponse); - // Get chunks by URL - rpc GetChunksByUrl(GetChunksByUrlRequest) returns (GetChunksByUrlResponse); -} - -message Query { - // Semantic query to execute - string semantic_query = 1; -} - -message DocumentNameQuery { - // Document name to search for - string document_name = 1; -} - -message TimeSpan { - optional google.protobuf.Timestamp after = 1; - optional google.protobuf.Timestamp before = 2; -} - -message Filters { - // Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" - repeated string scope = 1; - // Timespan of response chunk's creation - optional TimeSpan created = 2; - // Timespan of response chunk's last modification - optional TimeSpan modified = 3; - // List of user emails associated with response chunk - repeated string user_emails = 4; - // Include content from documents in trash - optional bool include_content_in_trash = 5; -} - -message QueryRequest { - // How many results to try to return (maximum number of results) - optional uint32 count = 1; - // The query to execute - Query query = 2; - // Filters to apply to query - optional Filters filters = 3; -} - -message QueryResponse { - // Query was successful - bool success = 1; - // Error message if query failed - optional google.protobuf.Struct error = 2; - // List of relevant chunks - repeated RelevantChunk relevant_chunks = 3; -} - -message GetChunksByUrlRequest { - // URL to document - string url = 1; -} - -message GetChunksByUrlResponse { - // Fetch was successful - bool success = 1; - // Error message if fetch failed - optional google.protobuf.Struct error = 2; - // List of chunks - repeated Chunk chunks = 3; -} - -message QueryByDocumentNameRequest { - // The query to execute - DocumentNameQuery query = 2; - // Filters to apply to query - optional Filters filters = 3; -} - -message QueryByDocumentNameResponse { - // Query was successful - bool success = 1; - // Error message if query failed - optional google.protobuf.Struct error = 2; - // List of relevant chunks - repeated Chunk chunks = 3; -} diff --git a/sdks/node/src/multiUserClient.ts b/sdks/node/src/multiUserClient.ts index ee4baa4..75df4f2 100644 --- a/sdks/node/src/multiUserClient.ts +++ b/sdks/node/src/multiUserClient.ts @@ -3,9 +3,8 @@ import { randomUUID } from "node:crypto"; import { AuthClient } from "./authClient"; import { Chunk, RelevantChunk } from "./grpc/chunks"; import { - GetChunksByUrlSearchParams, - QueryChunksByDocumentNameSearchParams, - QueryChunksSearchParams, + SearchChunksParams, + GetDocumentParams, SearchClient } from "./searchClient"; @@ -17,14 +16,11 @@ export interface UserData { connections?: string[]; } -export interface QueryChunksParams extends Omit { - userId: string; -} -export interface QueryChunksByDocumentNameParams extends Omit { +export interface MultiUserSearchChunksParams extends Omit { userId: string; } -export interface GetChunksByUrlParams extends Omit { +export interface MultiUserGetDocumentParams extends Omit { userId: string; } @@ -154,12 +150,12 @@ export class MultiUserClient { /** * Query for relevant chunks based on a semantic query. * @param userId - The ID of the user. - * @param semanticQuery - The query string used to find relevant chunks. + * @param query - The query string used to find relevant chunks. * @param count - The number of relevant chunks to retrieve. Defaults to 10. * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async queryChunks({ userId, semanticQuery, count = 10, filters }: QueryChunksParams): Promise { + async searchChunksBySemantics({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -168,21 +164,18 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.queryChunks({ accessToken: userData.idToken!, semanticQuery, count, filters }); + return await this.searchClient.searchChunksBySemantics({ accessToken: userData.idToken!, query, count, filters }); } /** - * Query for chunks by document name. + * Query for relevant chunks containing the provided keywords * @param userId - The ID of the user. - * @param documentName - The name of the document to retrieve chunks. - * @param filters - The filters for querying documents. Optional. - * @returns The complete list of chunks for the matching document. + * @param query - The query string used to find relevant chunks. + * @param count - The number of relevant chunks to retrieve. Defaults to 10. + * @param filters - An object of filters for querying. Optional. + * @returns list of relevant chunks. */ - async queryChunksByDocumentName({ - userId, - documentName, - filters - }: QueryChunksByDocumentNameParams): Promise { + async searchChunksByKeyword({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -191,16 +184,17 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.queryChunksByDocumentName({ accessToken: userData.idToken!, documentName, filters }); + return await this.searchClient.searchChunksByKeyword({ accessToken: userData.idToken!, query, count, filters }); } /** * Get chunks from a document by its URL. * @param accessToken - The user's Redactive access token. - * @param url - The URL to the document for retrieving chunks. + * @param ref - A reference to the document we are retrieving. Can be either a url or document name. + * @param filters - The filters for querying documents. Optional. Only applicable for getting by document name. * @returns The complete list of chunks for the matching document. */ - async getChunksByUrl({ userId, url }: GetChunksByUrlParams): Promise { + async getDocument({ userId, ref, filters }: MultiUserGetDocumentParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -209,6 +203,6 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.getChunksByUrl({ accessToken: userData.idToken!, url }); + return await this.searchClient.getDocument({ accessToken: userData.idToken!, ref, filters}); } } diff --git a/sdks/node/src/searchClient.ts b/sdks/node/src/searchClient.ts index 3de55f1..5584380 100644 --- a/sdks/node/src/searchClient.ts +++ b/sdks/node/src/searchClient.ts @@ -2,36 +2,28 @@ import { Client, credentials, Metadata } from "@grpc/grpc-js"; import { Chunk, RelevantChunk } from "./grpc/chunks"; import { - DocumentNameQuery, Filters, - GetChunksByUrlRequest, - GetChunksByUrlResponse, + GetDocumentRequest, + GetDocumentResponse, Query, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, - QueryRequest, - QueryResponse, + SearchChunksRequest, + SearchChunksResponse, SearchClient as SearchServiceClient } from "./grpc/search"; -export interface QueryChunksSearchParams { +export interface SearchChunksParams { accessToken: string; - semanticQuery: string; + query: string; count?: number; filters?: Partial; } -export interface QueryChunksByDocumentNameSearchParams { +export interface GetDocumentParams { accessToken: string; - documentName: string; + ref: string; filters?: Partial; } -export interface GetChunksByUrlSearchParams { - accessToken: string; - url: string; -} - export class SearchClient { host: string = "grpc.redactive.ai"; port: number = 443; @@ -63,32 +55,32 @@ export class SearchClient { /** * Query for relevant chunks based on a semantic query. * @param accessToken - The user's Redactive access token. - * @param semanticQuery - The query string used to find relevant chunks. + * @param query - The query string used to find relevant chunks. * @param count - The number of relevant chunks to retrieve. Defaults to 10. * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async queryChunks({ + async searchChunksBySemantics({ accessToken, - semanticQuery, + query, count = 10, filters - }: QueryChunksSearchParams): Promise { + }: SearchChunksParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const query: Query = { semanticQuery }; + const query_obj: Query = { semanticQuery: query }; const _filters: Filters = { scope: [], userEmails: [], ...filters }; - const queryRequest: QueryRequest = { - query, + const searchRequest: SearchChunksRequest = { + query: query_obj, count, filters: filters ? _filters : undefined }; - const response = await new Promise((resolve, reject) => { - client.queryChunks(queryRequest, requestMetadata, (err, response) => { + const response = await new Promise((resolve, reject) => { + client.searchChunks(searchRequest, requestMetadata, (err, response) => { if (err) { reject(err); return; @@ -101,31 +93,34 @@ export class SearchClient { } /** - * Query for chunks by document name. + * Query for relevant chunks based on keywords. * @param accessToken - The user's Redactive access token. - * @param documentName - The name of the document to retrieve chunks. - * @param filters - The filters for querying documents. Optional. - * @returns The complete list of chunks for the matching document. + * @param query - The query string used to find relevant chunks. + * @param count - The number of relevant chunks to retrieve. Defaults to 10. + * @param filters - An object of filters for querying. Optional. + * @returns list of relevant chunks. */ - async queryChunksByDocumentName({ + async searchChunksByKeyword({ accessToken, - documentName, + query, + count = 10, filters - }: QueryChunksByDocumentNameSearchParams): Promise { + }: SearchChunksParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const query: DocumentNameQuery = { documentName }; + const query_obj: Query = { keywordQuery: query }; const _filters: Filters = { scope: [], userEmails: [], ...filters }; - const queryRequest: QueryByDocumentNameRequest = { - query, + const searchRequest: SearchChunksRequest = { + query: query_obj, + count, filters: filters ? _filters : undefined }; - const response = await new Promise((resolve, reject) => { - client.queryChunksByDocumentName(queryRequest, requestMetadata, (err, response) => { + const response = await new Promise((resolve, reject) => { + client.searchChunks(searchRequest, requestMetadata, (err, response) => { if (err) { reject(err); return; @@ -134,27 +129,34 @@ export class SearchClient { return resolve(response); }); }); - return response.chunks; + return response.relevantChunks; } /** - * Get chunks from a document by its URL. + * Get chunks for a document via a specific reference * @param accessToken - The user's Redactive access token. - * @param url - The URL to the document for retrieving chunks. + * @param ref - A reference to the document to retrieve. Can be either a url or document name. + * @param filters - The filters for querying documents. Optional. Only applicable for getting by document name. * @returns The complete list of chunks for the matching document. */ - async getChunksByUrl({ accessToken, url }: GetChunksByUrlSearchParams): Promise { + async getDocument({ + accessToken, + ref, + filters + }: GetDocumentParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const queryRequest: GetChunksByUrlRequest = { - url + const _filters: Filters = { scope: [], userEmails: [], ...filters }; + const queryRequest: GetDocumentRequest = { + ref, + filters: filters ? _filters : undefined }; - const response = await new Promise((resolve, reject) => { - client.getChunksByUrl(queryRequest, requestMetadata, (err, response) => { + const response = await new Promise((resolve, reject) => { + client.getDocument(queryRequest, requestMetadata, (err, response) => { if (err) { reject(err); return; diff --git a/sdks/python/src/redactive/grpc/v1/__init__.py b/sdks/python/src/redactive/grpc/v1/__init__.py deleted file mode 100644 index a5a75fb..0000000 --- a/sdks/python/src/redactive/grpc/v1/__init__.py +++ /dev/null @@ -1,344 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# sources: chunks.proto, search.proto -# plugin: python-betterproto -# This file has been @generated - -from dataclasses import dataclass -from datetime import datetime -from typing import ( - TYPE_CHECKING, - Dict, - List, - Optional, -) - -import betterproto -import betterproto.lib.google.protobuf as betterproto_lib_google_protobuf -import grpclib -from betterproto.grpc.grpclib_server import ServiceBase - -if TYPE_CHECKING: - import grpclib.server - from betterproto.grpc.grpclib_client import MetadataLike - from grpclib.metadata import Deadline - - -@dataclass(eq=False, repr=False) -class ChunkMetadata(betterproto.Message): - created_at: Optional[datetime] = betterproto.message_field(1, optional=True, group="_created_at") - """Chunk content's creation timestamp""" - - modified_at: Optional[datetime] = betterproto.message_field(2, optional=True, group="_modified_at") - """Chunk content's last modified timestamp""" - - link: Optional[str] = betterproto.string_field(3, optional=True, group="_link") - - -@dataclass(eq=False, repr=False) -class SourceReference(betterproto.Message): - system: str = betterproto.string_field(1) - """Source system of the document e.g. confluence, slack, google-drive""" - - system_version: str = betterproto.string_field(2) - """Version of the source system e.g. 1.0.0""" - - connection_id: str = betterproto.string_field(3) - """ - Connection id to the source system e.g. confluence space id, slack channel - id, google-drive drive id - """ - - document_id: str = betterproto.string_field(4) - """ - Document id in the source system e.g. confluence page id, slack message id, - google-drive document id - """ - - document_version: str = betterproto.string_field(5) - """ - Document version in the source system e.g. confluence page version, slack - message version, google-drive document version - """ - - document_path: Optional[str] = betterproto.string_field(6, optional=True, group="_document_path") - """ - Document path in the source system e.g. "My Drive/document.txt", "slack- - channel-name" - """ - - document_name: Optional[str] = betterproto.string_field(7, optional=True, group="_document_name") - """Document name in the source system e.g. "document.txt""" - - -@dataclass(eq=False, repr=False) -class ChunkReference(betterproto.Message): - chunking_version: str = betterproto.string_field(1) - """Chunking version e.g. 1.0.0""" - - chunk_id: str = betterproto.string_field(2) - """chunk id is unique within the document, but not globally unique.""" - - chunk_hash: str = betterproto.string_field(3) - """SHA256 hash of the chunk body""" - - -@dataclass(eq=False, repr=False) -class RelevantChunk(betterproto.Message): - """A chunk is a part of a document""" - - source: "SourceReference" = betterproto.message_field(1) - """Source reference of the document""" - - chunk: "ChunkReference" = betterproto.message_field(2) - """Chunk reference of the chunk""" - - relevance: "RelevantChunkRelevance" = betterproto.message_field(3) - """Relevance of the chunk""" - - chunk_body: str = betterproto.string_field(4) - """Chunk body""" - - document_metadata: "ChunkMetadata" = betterproto.message_field(5) - """Document metadata""" - - -@dataclass(eq=False, repr=False) -class RelevantChunkRelevance(betterproto.Message): - similarity_score: float = betterproto.float_field(1) - """Similarity score of the chunk""" - - -@dataclass(eq=False, repr=False) -class Chunk(betterproto.Message): - """A chunk is a part of a document""" - - source: "SourceReference" = betterproto.message_field(1) - """Source reference of the document""" - - chunk: "ChunkReference" = betterproto.message_field(2) - """Chunk reference of the chunk""" - - chunk_body: str = betterproto.string_field(3) - """Chunk body""" - - document_metadata: "ChunkMetadata" = betterproto.message_field(4) - """Document metadata""" - - -@dataclass(eq=False, repr=False) -class Query(betterproto.Message): - semantic_query: str = betterproto.string_field(1) - """Semantic query to execute""" - - -@dataclass(eq=False, repr=False) -class DocumentNameQuery(betterproto.Message): - document_name: str = betterproto.string_field(1) - """Document name to search for""" - - -@dataclass(eq=False, repr=False) -class TimeSpan(betterproto.Message): - after: Optional[datetime] = betterproto.message_field(1, optional=True, group="_after") - before: Optional[datetime] = betterproto.message_field(2, optional=True, group="_before") - - -@dataclass(eq=False, repr=False) -class Filters(betterproto.Message): - scope: List[str] = betterproto.string_field(1) - """ - Scope e.g. "confluence", "slack://channel-name", "google- - drive://CompanyDrive/document.docx" - """ - - created: Optional["TimeSpan"] = betterproto.message_field(2, optional=True, group="_created") - """Timespan of response chunk's creation""" - - modified: Optional["TimeSpan"] = betterproto.message_field(3, optional=True, group="_modified") - """Timespan of response chunk's last modification""" - - user_emails: List[str] = betterproto.string_field(4) - """List of user emails associated with response chunk""" - - include_content_in_trash: Optional[bool] = betterproto.bool_field( - 5, optional=True, group="_include_content_in_trash" - ) - """Include content from documents in trash""" - - -@dataclass(eq=False, repr=False) -class QueryRequest(betterproto.Message): - count: Optional[int] = betterproto.uint32_field(1, optional=True, group="_count") - """How many results to try to return (maximum number of results)""" - - query: "Query" = betterproto.message_field(2) - """The query to execute""" - - filters: Optional["Filters"] = betterproto.message_field(3, optional=True, group="_filters") - """Filters to apply to query""" - - -@dataclass(eq=False, repr=False) -class QueryResponse(betterproto.Message): - success: bool = betterproto.bool_field(1) - """Query was successful""" - - error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( - 2, optional=True, group="_error" - ) - """Error message if query failed""" - - relevant_chunks: List["RelevantChunk"] = betterproto.message_field(3) - """List of relevant chunks""" - - -@dataclass(eq=False, repr=False) -class GetChunksByUrlRequest(betterproto.Message): - url: str = betterproto.string_field(1) - """URL to document""" - - -@dataclass(eq=False, repr=False) -class GetChunksByUrlResponse(betterproto.Message): - success: bool = betterproto.bool_field(1) - """Fetch was successful""" - - error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( - 2, optional=True, group="_error" - ) - """Error message if fetch failed""" - - chunks: List["Chunk"] = betterproto.message_field(3) - """List of chunks""" - - -@dataclass(eq=False, repr=False) -class QueryByDocumentNameRequest(betterproto.Message): - query: "DocumentNameQuery" = betterproto.message_field(2) - """The query to execute""" - - filters: Optional["Filters"] = betterproto.message_field(3, optional=True, group="_filters") - """Filters to apply to query""" - - -@dataclass(eq=False, repr=False) -class QueryByDocumentNameResponse(betterproto.Message): - success: bool = betterproto.bool_field(1) - """Query was successful""" - - error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( - 2, optional=True, group="_error" - ) - """Error message if query failed""" - - chunks: List["Chunk"] = betterproto.message_field(3) - """List of relevant chunks""" - - -class SearchStub(betterproto.ServiceStub): - async def query_chunks( - self, - query_request: "QueryRequest", - *, - timeout: Optional[float] = None, - deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None, - ) -> "QueryResponse": - return await self._unary_unary( - "/redactive.grpc.v1.Search/QueryChunks", - query_request, - QueryResponse, - timeout=timeout, - deadline=deadline, - metadata=metadata, - ) - - async def query_chunks_by_document_name( - self, - query_by_document_name_request: "QueryByDocumentNameRequest", - *, - timeout: Optional[float] = None, - deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None, - ) -> "QueryByDocumentNameResponse": - return await self._unary_unary( - "/redactive.grpc.v1.Search/QueryChunksByDocumentName", - query_by_document_name_request, - QueryByDocumentNameResponse, - timeout=timeout, - deadline=deadline, - metadata=metadata, - ) - - async def get_chunks_by_url( - self, - get_chunks_by_url_request: "GetChunksByUrlRequest", - *, - timeout: Optional[float] = None, - deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None, - ) -> "GetChunksByUrlResponse": - return await self._unary_unary( - "/redactive.grpc.v1.Search/GetChunksByUrl", - get_chunks_by_url_request, - GetChunksByUrlResponse, - timeout=timeout, - deadline=deadline, - metadata=metadata, - ) - - -class SearchBase(ServiceBase): - async def query_chunks(self, query_request: "QueryRequest") -> "QueryResponse": - raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - - async def query_chunks_by_document_name( - self, query_by_document_name_request: "QueryByDocumentNameRequest" - ) -> "QueryByDocumentNameResponse": - raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - - async def get_chunks_by_url(self, get_chunks_by_url_request: "GetChunksByUrlRequest") -> "GetChunksByUrlResponse": - raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - - async def __rpc_query_chunks(self, stream: "grpclib.server.Stream[QueryRequest, QueryResponse]") -> None: - request = await stream.recv_message() - response = await self.query_chunks(request) - await stream.send_message(response) - - async def __rpc_query_chunks_by_document_name( - self, - stream: "grpclib.server.Stream[QueryByDocumentNameRequest, QueryByDocumentNameResponse]", - ) -> None: - request = await stream.recv_message() - response = await self.query_chunks_by_document_name(request) - await stream.send_message(response) - - async def __rpc_get_chunks_by_url( - self, - stream: "grpclib.server.Stream[GetChunksByUrlRequest, GetChunksByUrlResponse]", - ) -> None: - request = await stream.recv_message() - response = await self.get_chunks_by_url(request) - await stream.send_message(response) - - def __mapping__(self) -> Dict[str, grpclib.const.Handler]: - return { - "/redactive.grpc.v1.Search/QueryChunks": grpclib.const.Handler( - self.__rpc_query_chunks, - grpclib.const.Cardinality.UNARY_UNARY, - QueryRequest, - QueryResponse, - ), - "/redactive.grpc.v1.Search/QueryChunksByDocumentName": grpclib.const.Handler( - self.__rpc_query_chunks_by_document_name, - grpclib.const.Cardinality.UNARY_UNARY, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, - ), - "/redactive.grpc.v1.Search/GetChunksByUrl": grpclib.const.Handler( - self.__rpc_get_chunks_by_url, - grpclib.const.Cardinality.UNARY_UNARY, - GetChunksByUrlRequest, - GetChunksByUrlResponse, - ), - } diff --git a/sdks/python/src/redactive/search_client.py b/sdks/python/src/redactive/search_client.py index 9a02916..2955fc4 100644 --- a/sdks/python/src/redactive/search_client.py +++ b/sdks/python/src/redactive/search_client.py @@ -5,7 +5,7 @@ from grpclib.client import Channel from redactive._connection_mode import get_default_grpc_host_and_port as _get_default_grpc_host_and_port -from redactive.grpc.v1 import ( +from redactive.grpc.v2 import ( Chunk, DocumentNameQuery, Filters, From 400f3c8acee9c29300a63d1e7e97e2a55d0d8378 Mon Sep 17 00:00:00 2001 From: Angus White Date: Fri, 15 Nov 2024 14:49:28 +1100 Subject: [PATCH 03/12] Fix node SDK unit tests --- sdks/node/src/multiUserClient.test.ts | 35 +++++++------- sdks/node/src/searchClient.test.ts | 66 +++++++++++++++------------ 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/sdks/node/src/multiUserClient.test.ts b/sdks/node/src/multiUserClient.test.ts index bb759f7..dc57963 100644 --- a/sdks/node/src/multiUserClient.test.ts +++ b/sdks/node/src/multiUserClient.test.ts @@ -137,18 +137,18 @@ describe("MultiUserClient", () => { it("should throw an error if no valid session when querying chunks", async () => { const userId = "user123"; - const semanticQuery = "query"; + const query = "query"; readUserData.mockResolvedValue(undefined); - await expect(multiUserClient.queryChunks({ userId, semanticQuery })).rejects.toThrow( + await expect(multiUserClient.searchChunksBySemantics({ userId, query })).rejects.toThrow( `No valid Redactive session for user '${userId}'` ); }); - it("should query chunks after refreshing idToken", async () => { + it("should query chunks semantically after refreshing idToken", async () => { const userId = "user123"; - const semanticQuery = "query"; + const query = "query"; const idToken = "idToken123"; const refreshToken = "refreshToken123"; const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; @@ -166,18 +166,18 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.queryChunks.mockResolvedValue(chunks as unknown as RelevantChunk[]); + mockSearchClient.searchChunksBySemantics.mockResolvedValue(chunks as unknown as RelevantChunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.queryChunks({ userId, semanticQuery }); + const result = await multiUserClient.searchChunksBySemantics({ userId, query }); expect(result).toEqual(chunks); - expect(mockSearchClient.queryChunks).toHaveBeenCalledWith({ accessToken: idToken, semanticQuery, count: 10 }); + expect(mockSearchClient.searchChunksBySemantics).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); }); - it("should query chunks by document name after refreshing idToken", async () => { + it("should query chunks by keyword after refreshing idToken", async () => { const userId = "user123"; - const documentName = "test-document"; + const query = "query"; const idToken = "idToken123"; const refreshToken = "refreshToken123"; const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; @@ -195,18 +195,18 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.queryChunksByDocumentName.mockResolvedValue(chunks as unknown as Chunk[]); + mockSearchClient.searchChunksByKeyword.mockResolvedValue(chunks as unknown as RelevantChunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.queryChunksByDocumentName({ userId, documentName }); + const result = await multiUserClient.searchChunksByKeyword({ userId, query }); expect(result).toEqual(chunks); - expect(mockSearchClient.queryChunksByDocumentName).toHaveBeenCalledWith({ accessToken: idToken, documentName }); + expect(mockSearchClient.searchChunksByKeyword).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); }); - it("should get chunks by url after refreshing idToken", async () => { + it("should query chunks by document ref after refreshing idToken", async () => { const userId = "user123"; - const url = "https://example.com"; + const documentName = "test-document"; const idToken = "idToken123"; const refreshToken = "refreshToken123"; const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; @@ -224,12 +224,13 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.getChunksByUrl.mockResolvedValue(chunks as unknown as Chunk[]); + mockSearchClient.getDocument.mockResolvedValue(chunks as unknown as Chunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.getChunksByUrl({ userId, url }); + const result = await multiUserClient.getDocument({ userId, ref: documentName }); expect(result).toEqual(chunks); - expect(mockSearchClient.getChunksByUrl).toHaveBeenCalledWith({ accessToken: idToken, url }); + expect(mockSearchClient.getDocument).toHaveBeenCalledWith({ accessToken: idToken, ref: documentName }); }); + }); diff --git a/sdks/node/src/searchClient.test.ts b/sdks/node/src/searchClient.test.ts index 03a8abe..b453727 100644 --- a/sdks/node/src/searchClient.test.ts +++ b/sdks/node/src/searchClient.test.ts @@ -4,12 +4,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { Chunk, ChunkReference, RelevantChunk, RelevantChunk_Relevance, SourceReference } from "./grpc/chunks"; import { Filters, - GetChunksByUrlRequest, - GetChunksByUrlResponse, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, - QueryRequest, - QueryResponse, + GetDocumentRequest, + GetDocumentResponse, + SearchChunksRequest, + SearchChunksResponse, SearchClient as SearchServiceClient } from "./grpc/search"; import { SearchClient } from "./searchClient"; @@ -21,9 +19,9 @@ describe("Service client", () => { vi.clearAllMocks(); }); - it("should query chunks by document name", async () => { + it("should get chunks by document reference", async () => { const accessToken = "test-accessToken"; - const documentName = "test-documentName"; + const ref = "test-documentName"; const filters: Partial = { scope: ["dataprovider"], created: { before: new Date() }, @@ -55,21 +53,21 @@ describe("Service client", () => { // Mock the _getClient method of SearchClient to return a mock gRPC client vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - queryChunksByDocumentName: ( - _request: QueryByDocumentNameRequest, + getDocument: ( + _request: GetDocumentRequest, _metadata: Metadata, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ) => callback(null, { success: true, chunks: expectedResponse } as QueryByDocumentNameResponse) + callback: (error: ServiceError | null, response: GetDocumentResponse) => void + ) => callback(null, { success: true, chunks: expectedResponse } as GetDocumentResponse) } as unknown as SearchServiceClient); const client = new SearchClient(); - const response = await client.queryChunksByDocumentName({ accessToken, documentName, filters }); + const response = await client.getDocument({ accessToken, ref, filters }); expect(response).toStrictEqual(expectedResponse); }); - it("should query chunks", async () => { + it("should search chunks semantically", async () => { const accessToken = "test-accessToken"; const query = "test-query"; const count = 1; @@ -107,27 +105,34 @@ describe("Service client", () => { // Mock the _getClient method of SearchClient to return a mock gRPC client vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - queryChunks: ( - _request: QueryRequest, + searchChunks: ( + _request: SearchChunksRequest, _metadata: Metadata, - callback: (error: ServiceError | null, response: QueryResponse) => void - ) => callback(null, QueryResponse.fromJSON({ relevantChunks: expectedResponse })) + callback: (error: ServiceError | null, response: SearchChunksResponse) => void + ) => callback(null, SearchChunksResponse.fromJSON({ relevantChunks: expectedResponse })) } as unknown as SearchServiceClient); // Create an instance of SearchClient const client = new SearchClient(); // Call the queryChunks method and capture the response - const response = await client.queryChunks({ accessToken, semanticQuery: query, count, filters }); + const response = await client.searchChunksBySemantics({ accessToken, query, count, filters }); // Assert that the response matches the expected response expect(response).toStrictEqual(expectedResponse); }); - it("should get chunks by url", async () => { + it("should search chunks by keyword", async () => { const accessToken = "test-accessToken"; - const url = "https://example.com"; - const expectedResponse: Chunk[] = Array.from({ length: 10 }, (_, i) => ({ + const query = "test-query"; + const count = 1; + const filters: Partial = { + scope: ["dataprovider"], + created: { before: new Date() }, + modified: { after: new Date() }, + includeContentInTrash: true + }; + const expectedResponse: RelevantChunk[] = Array.from({ length: count }, (_, i) => ({ source: { system: `system-${i}`, systemVersion: `systemVersion-${i}`, @@ -147,23 +152,26 @@ describe("Service client", () => { createdAt: undefined, link: undefined, modifiedAt: undefined - } + }, + relevance: { + similarityScore: 1.0 + } as RelevantChunk_Relevance })); // Mock the _getClient method of SearchClient to return a mock gRPC client vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - getChunksByUrl: ( - _request: GetChunksByUrlRequest, + searchChunks: ( + _request: SearchChunksRequest, _metadata: Metadata, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void - ) => callback(null, GetChunksByUrlResponse.fromJSON({ chunks: expectedResponse })) + callback: (error: ServiceError | null, response: SearchChunksResponse) => void + ) => callback(null, SearchChunksResponse.fromJSON({ relevantChunks: expectedResponse })) } as unknown as SearchServiceClient); // Create an instance of SearchClient const client = new SearchClient(); - // Call the getChunksByUrl method and capture the response - const response = await client.getChunksByUrl({ accessToken, url }); + // Call the queryChunks method and capture the response + const response = await client.searchChunksByKeyword({ accessToken, query, count, filters }); // Assert that the response matches the expected response expect(response).toStrictEqual(expectedResponse); From 343a03d0e36a8db7f125202cad4fde5ed7590b4e Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:18:12 +1100 Subject: [PATCH 04/12] Implement python API --- sdks/python/README.md | 25 +++---- sdks/python/src/redactive/grpc/v2/__init__.py | 2 +- .../python/src/redactive/multi_user_client.py | 34 ++------- .../src/redactive/reranking/reranker.py | 11 ++- sdks/python/src/redactive/search_client.py | 75 ++++--------------- .../unit_tests/multi_user_client_tests.py | 47 ++++-------- .../tests/unit_tests/search_client_tests.py | 49 ++++-------- 7 files changed, 67 insertions(+), 176 deletions(-) diff --git a/sdks/python/README.md b/sdks/python/README.md index 34989a8..d8f9980 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -56,11 +56,12 @@ response = await client.exchange_tokens(code="OAUTH2-TOKEN") ### SearchClient -With the Redactive access_token, you can perform three types of searches using the Redactive Search service: +With a Redactive access_token, you can perform two types of search -1. **Semantic Query Search**: Retrieve relevant chunks of information that are semantically related to a user query. -2. **URL-based Search**: Obtain all the chunks from a document by specifying its URL. -3. **Document Name Search**: Query for all the chunks from a document based on the name of the document. +#### Query-based Search + +1. **Query-based Search**: Retrieve relevant chunks of information that are related to a user query. +2. **Document Fetch**: Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). ```python from redactive.search_client import SearchClient @@ -68,21 +69,17 @@ from redactive.search_client import SearchClient client = SearchClient() # Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query -client.query_chunks( +client.search_chunks( access_token="REDACTIVE-USER-ACCESS-TOKEN", - semantic_query="Tell me about AI" + query="Tell me about AI" ) +``` +```python # URL-based Search: retrieve all chunks of the document at that URL -client.get_chunks_by_url( - access_token="REDACTIVE-USER-ACCESS-TOKEN", - url="https://example.com/document" -) - -# Document Name Search: retrieve all chunks of a document identified by its name -client.query_chunks_by_document_name( +client.get_document( access_token="REDACTIVE-USER-ACCESS-TOKEN", - document_name="Project Plan" + ref="https://example.com/document" ) ``` diff --git a/sdks/python/src/redactive/grpc/v2/__init__.py b/sdks/python/src/redactive/grpc/v2/__init__.py index 04e63d0..ae76c3c 100644 --- a/sdks/python/src/redactive/grpc/v2/__init__.py +++ b/sdks/python/src/redactive/grpc/v2/__init__.py @@ -172,7 +172,7 @@ class Filters(betterproto.Message): of documents. Subspaces take the form of :/// e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding - Guide' for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared + Guide'. For Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' """ diff --git a/sdks/python/src/redactive/multi_user_client.py b/sdks/python/src/redactive/multi_user_client.py index f50d8bd..09ab679 100644 --- a/sdks/python/src/redactive/multi_user_client.py +++ b/sdks/python/src/redactive/multi_user_client.py @@ -7,7 +7,7 @@ import jwt from redactive.auth_client import AuthClient -from redactive.grpc.v1 import Chunk, Filters, RelevantChunk +from redactive.grpc.v2 import Chunk, Filters, RelevantChunk from redactive.search_client import SearchClient @@ -149,16 +149,16 @@ async def _get_id_token(self, user_id: str) -> str: raise InvalidRedactiveSessionError(user_id) return user_data.id_token - async def query_chunks( - self, user_id: str, semantic_query: str, count: int = 10, filters: Filters | dict[str, Any] | None = None + async def search_chunks( + self, user_id: str, query: str, count: int = 10, filters: Filters | dict[str, Any] | None = None ) -> list[RelevantChunk]: """ Query for relevant chunks based on a semantic query. :param user_id: The ID of the user. :type user_id: str - :param semantic_query: The query string used to find relevant chunks. - :type semantic_query: str + :param query: The query string used to find relevant chunks. + :type query: str :param count: The number of relevant chunks to retrieve. Defaults to 10. :type count: int, optional :param filters: The filters for relevant chunks. See `Filters` type. @@ -167,27 +167,9 @@ async def query_chunks( :rtype: list[RelevantChunk] """ id_token = await self._get_id_token(user_id) - return await self.search_client.query_chunks(id_token, semantic_query, count, filters=filters) + return await self.search_client.search_chunks(id_token, query, count, filters=filters) - async def query_chunks_by_document_name( - self, user_id: str, document_name: str, filters: Filters | dict[str, Any] | None = None - ) -> list[Chunk]: - """ - Query for chunks by document name. - - :param user_id: The ID of the user. - :type user_id: str - :param document_name: The name of the document to retrieve chunks. - :type document_name: str - :param filters: The filters for querying documents. See `Filters` type. - :type filters: Filters | dict[str, Any], optional - :return: The complete list of chunks for the matching document. - :rtype: list[Chunk] - """ - id_token = await self._get_id_token(user_id) - return await self.search_client.query_chunks_by_document_name(id_token, document_name, filters) - - async def get_chunks_by_url(self, user_id: str, url: str) -> list[Chunk]: + async def get_document(self, user_id: str, ref: str) -> list[Chunk]: """ Get chunks from a document by its URL. @@ -199,4 +181,4 @@ async def get_chunks_by_url(self, user_id: str, url: str) -> list[Chunk]: :rtype: list[Chunk] """ id_token = await self._get_id_token(user_id) - return await self.search_client.get_chunks_by_url(id_token, url) + return await self.search_client.get_document(id_token, ref) diff --git a/sdks/python/src/redactive/reranking/reranker.py b/sdks/python/src/redactive/reranking/reranker.py index 7bc7aa5..269b9b5 100644 --- a/sdks/python/src/redactive/reranking/reranker.py +++ b/sdks/python/src/redactive/reranking/reranker.py @@ -4,7 +4,7 @@ from rerankers import Reranker from redactive import search_client -from redactive.grpc.v1 import Filters, RelevantChunk +from redactive.grpc.v2 import Filters, RelevantChunk @dataclass @@ -32,9 +32,8 @@ def __init__(self, host: str = "grpc.redactive.ai", port: int = 443) -> None: async def query_chunks( self, access_token: str, - semantic_query: str, + query: str, count: int = 3, - query_filter: dict[str, Any] | None = None, filters: Filters | dict[str, Any] | None = None, ) -> list[RelevantChunk]: # Get many more results than the user is asking for, then @@ -43,11 +42,11 @@ async def query_chunks( if big_fetch_count > self.conf.max_fetch_results: big_fetch_count = self.conf.max_fetch_results - fetched_chunks = await super().query_chunks( - access_token, semantic_query, big_fetch_count, query_filter, filters + fetched_chunks = await super().search_chunks( + access_token, query, big_fetch_count, filters ) ranker = Reranker(self.conf.reranking_algorithm) - return self.rerank(semantic_query, fetched_chunks, ranker, count) + return self.rerank(query, fetched_chunks, ranker, count) def rerank(self, query_string: str, fetched_chunks: list[RelevantChunk], ranker, top_k): """ diff --git a/sdks/python/src/redactive/search_client.py b/sdks/python/src/redactive/search_client.py index 2955fc4..1bbd291 100644 --- a/sdks/python/src/redactive/search_client.py +++ b/sdks/python/src/redactive/search_client.py @@ -1,18 +1,14 @@ -import warnings from typing import Any -from urllib.parse import urlparse from grpclib.client import Channel from redactive._connection_mode import get_default_grpc_host_and_port as _get_default_grpc_host_and_port from redactive.grpc.v2 import ( Chunk, - DocumentNameQuery, Filters, - GetChunksByUrlRequest, Query, - QueryByDocumentNameRequest, - QueryRequest, + GetDocumentRequest, + SearchChunksRequest, RelevantChunk, SearchStub, ) @@ -40,12 +36,11 @@ def __init__(self, host: str | None = None, port: int | None = None) -> None: self.host = host self.port = port - async def query_chunks( + async def search_chunks( self, access_token: str, - semantic_query: str, + query: str, count: int = 10, - query_filter: dict[str, Any] | None = None, filters: Filters | dict[str, Any] | None = None, ) -> list[RelevantChunk]: """ @@ -53,20 +48,15 @@ async def query_chunks( :param access_token: The user's Redactive access token. :type access_token: str - :param semantic_query: The query string used to find relevant chunks. - :type semantic_query: str + :param query: The query string used to find relevant chunks. + :type query: str :param count: The number of relevant chunks to retrieve. Defaults to 10. :type count: int, optional - :param query_filter: deprecated, use `filters`. - :type query_filter: dict[str, Any], optional :param filters: The filters for relevant chunks. See `Filters` type. :type filters: Filters | dict[str, Any], optional :return: A list of relevant chunks that match the query :rtype: list[RelevantChunk] """ - if query_filter is not None: - warnings.warn("`query_filter` has been renamed `filters``", DeprecationWarning, stacklevel=2) - async with Channel(self.host, self.port, ssl=True) as channel: stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) @@ -75,67 +65,30 @@ async def query_chunks( _filters = filters elif isinstance(filters, dict): _filters = Filters(**filters) - elif query_filter is not None: - _filters = Filters(**query_filter) - request = QueryRequest(count=count, query=Query(semantic_query=semantic_query), filters=_filters) - response = await stub.query_chunks(request) + request = SearchChunksRequest(count=count, query=Query(semantic_query=query), filters=_filters) + response = await stub.search_chunks(request) return response.relevant_chunks - async def query_chunks_by_document_name( + async def get_document( self, access_token: str, - document_name: str, - filters: Filters | dict[str, Any] | None = None, + ref: str, ) -> list[Chunk]: """ Query for chunks by document name. :param access_token: The user's Redactive access token. :type access_token: str - :param document_name: The name of the document to retrieve chunks. - :type document_name: str - :param filters: The filters for querying documents. See `Filters` type. - :type filters: Filters | dict[str, Any], optional + :param ref: A reference to the document we are retrieving. + :type ref: str :return: The complete list of chunks for the matching document. :rtype: list[Chunk] """ async with Channel(self.host, self.port, ssl=True) as channel: stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) - _filters: Filters | None = None - if isinstance(filters, Filters): - _filters = filters - elif isinstance(filters, dict): - _filters = Filters(**filters) - - request = QueryByDocumentNameRequest(query=DocumentNameQuery(document_name=document_name), filters=_filters) - response = await stub.query_chunks_by_document_name(request) + request = GetDocumentRequest(ref=ref) + response = await stub.get_document(request) return response.chunks - async def get_chunks_by_url( - self, - access_token: str, - url: str, - ) -> list[Chunk]: - """ - Get chunks from a document by its URL. - - :param access_token: The user access token - :type access_token: str - :param url: The URL to the document for retrieving chunks. - :type url: str - :return: The complete list of chunks for the document. - :rtype: list[Chunk] - """ - async with Channel(self.host, self.port, ssl=True) as channel: - stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) - - parsed_url = urlparse(url) - if not all([parsed_url.scheme, parsed_url.netloc]): - msg = "Url is not valid" - raise ValueError(msg) - - request = GetChunksByUrlRequest(url=url) - response = await stub.get_chunks_by_url(request) - return response.chunks diff --git a/sdks/python/tests/unit_tests/multi_user_client_tests.py b/sdks/python/tests/unit_tests/multi_user_client_tests.py index 99ead79..1df2dd9 100644 --- a/sdks/python/tests/unit_tests/multi_user_client_tests.py +++ b/sdks/python/tests/unit_tests/multi_user_client_tests.py @@ -5,7 +5,7 @@ import pytest from redactive.auth_client import AuthClient -from redactive.grpc.v1 import Chunk, RelevantChunk +from redactive.grpc.v2 import Chunk, RelevantChunk from redactive.multi_user_client import MultiUserClient, UserData from redactive.search_client import SearchClient @@ -44,8 +44,8 @@ def test_multi_user_client_initialization() -> None: callback_uri = "http://callback.uri" read_user_data = mock.Mock() write_user_data = mock.Mock() - auth_base_url = ("http://auth.base.url",) - grpc_host = ("grpc.host",) + auth_base_url = "http://auth.base.url" + grpc_host = "grpc.host" grpc_port = 443 multi_user_client = MultiUserClient( @@ -85,60 +85,39 @@ def test_multi_user_client_initialization_with_no_options() -> None: @pytest.mark.asyncio -async def test_query_chunks(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: +async def test_search_chunks(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: user_id = "user123" - semantic_query = "example query" + query = "example query" count = 5 filters = {"key": "value"} relevant_chunks = [mock.Mock(spec=RelevantChunk) for _ in range(count)] multi_user_client.search_client = mock_search_client - multi_user_client.search_client.query_chunks.return_value = relevant_chunks + multi_user_client.search_client.search_chunks.return_value = relevant_chunks multi_user_client.read_user_data.side_effect = mock_read_user_data - result = await multi_user_client.query_chunks(user_id, semantic_query, count, filters=filters) + result = await multi_user_client.search_chunks(user_id, query, count, filters=filters) assert result == relevant_chunks - multi_user_client.search_client.query_chunks.assert_called_with( - "idToken123", semantic_query, count, filters=filters + multi_user_client.search_client.search_chunks.assert_called_with( + "idToken123", query, count, filters=filters ) @pytest.mark.asyncio -async def test_query_chunks_by_document_name( - multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock -) -> None: - user_id = "user123" - document_name = "example_document" - filters = {"key": "value"} - chunks = [mock.Mock(spec=Chunk) for _ in range(3)] - - multi_user_client.search_client = mock_search_client - multi_user_client.search_client.query_chunks_by_document_name.return_value = chunks - multi_user_client.read_user_data.side_effect = mock_read_user_data - - result = await multi_user_client.query_chunks_by_document_name(user_id, document_name, filters) - - assert result == chunks - multi_user_client.search_client.query_chunks_by_document_name.assert_called_with( - "idToken123", document_name, filters - ) - - -@pytest.mark.asyncio -async def test_get_chunks_by_url(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: +async def test_get_document_by_url(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: user_id = "user123" url = "http://example.com" chunks = [mock.Mock() for _ in range(3)] multi_user_client.search_client = mock_search_client - multi_user_client.search_client.get_chunks_by_url.return_value = chunks + multi_user_client.search_client.get_document.return_value = chunks multi_user_client.read_user_data.side_effect = mock_read_user_data - result = await multi_user_client.get_chunks_by_url(user_id, url) + result = await multi_user_client.get_document(user_id, url) assert result == chunks - multi_user_client.search_client.get_chunks_by_url.assert_called_with("idToken123", url) + multi_user_client.search_client.get_document.assert_called_with("idToken123", url) async def test_get_begin_connection_url(multi_user_client: MultiUserClient, mock_auth_client: mock.AsyncMock) -> None: diff --git a/sdks/python/tests/unit_tests/search_client_tests.py b/sdks/python/tests/unit_tests/search_client_tests.py index 35be4c8..d6cbe8b 100644 --- a/sdks/python/tests/unit_tests/search_client_tests.py +++ b/sdks/python/tests/unit_tests/search_client_tests.py @@ -2,7 +2,6 @@ import pytest -from redactive.grpc.v1 import DocumentNameQuery, Filters, QueryByDocumentNameRequest from redactive.search_client import SearchClient @@ -16,20 +15,20 @@ def test_init_client(): @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_query_chunks(mock_channel_context): - from redactive.grpc.v1 import Query, QueryRequest + from redactive.grpc.v2 import Query, SearchChunksRequest access_token = "test-access_token" - semantic_query = "Tell me about somethings" + query = "Tell me about somethings" count = 1 mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - with mock.patch("redactive.grpc.v1.SearchStub.query_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: + with mock.patch("redactive.grpc.v2.SearchStub.search_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: client = SearchClient() - await client.query_chunks(access_token, semantic_query, count) + await client.search_chunks(access_token, query, count) mock_query_chunks.assert_called_once_with( - QueryRequest( + SearchChunksRequest( count=count, - query=Query(semantic_query), + query=Query(query), ) ) @@ -37,52 +36,34 @@ async def test_query_chunks(mock_channel_context): @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_query_chunks_with_filter(mock_channel_context): - from redactive.grpc.v1 import Query, QueryRequest + from redactive.grpc.v2 import Query, SearchChunksRequest, Filters access_token = "test-access_token" - semantic_query = "Tell me about somethings" + query = "Tell me about somethings" count = 1 filters = {"scope": "mock.scope"} mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - with mock.patch("redactive.grpc.v1.SearchStub.query_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: + with mock.patch("redactive.grpc.v2.SearchStub.search_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: client = SearchClient() - await client.query_chunks(access_token, semantic_query, count, filters) + await client.search_chunks(access_token, query, count, filters) mock_query_chunks.assert_called_once_with( - QueryRequest(count=count, query=Query(semantic_query), filters=Filters(**filters)) - ) - - -@mock.patch("grpclib.client.Channel") -@pytest.mark.asyncio -async def test_query_chunks_by_document_name_with_filter(mock_channel_context): - access_token = "test-access_token" - document_name = "document_name" - filters = {"scope": "mock.scope"} - mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - - with mock.patch( - "redactive.grpc.v1.SearchStub.query_chunks_by_document_name", side_effect=mock.AsyncMock() - ) as mock_query_chunks_by_document_name: - client = SearchClient() - await client.query_chunks_by_document_name(access_token, document_name, filters) - mock_query_chunks_by_document_name.assert_called_once_with( - QueryByDocumentNameRequest(query=DocumentNameQuery(document_name), filters=Filters(**filters)) + SearchChunksRequest(count=count, query=Query(query), filters=Filters(**filters)) ) @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_get_chunks_by_url(mock_channel_context): - from redactive.grpc.v1 import GetChunksByUrlRequest + from redactive.grpc.v2 import GetDocumentRequest access_token = "test-access_token" url = "https://example.com" mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() with mock.patch( - "redactive.grpc.v1.SearchStub.get_chunks_by_url", side_effect=mock.AsyncMock() + "redactive.grpc.v2.SearchStub.get_document", side_effect=mock.AsyncMock() ) as mock_get_chunks_by_url: client = SearchClient() - await client.get_chunks_by_url(access_token, url) - mock_get_chunks_by_url.assert_called_once_with(GetChunksByUrlRequest(url=url)) + await client.get_document(access_token, url) + mock_get_chunks_by_url.assert_called_once_with(GetDocumentRequest(ref=url)) From 826c988f6d1f2a581239ab0d3527f217e824bd23 Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:32:27 +1100 Subject: [PATCH 05/12] Update doco for Python SDK --- sdks/python/README.md | 48 +++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/sdks/python/README.md b/sdks/python/README.md index d8f9980..81d5faf 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -60,8 +60,7 @@ With a Redactive access_token, you can perform two types of search #### Query-based Search -1. **Query-based Search**: Retrieve relevant chunks of information that are related to a user query. -2. **Document Fetch**: Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). +Retrieve relevant chunks of information that are related to a user query. ```python from redactive.search_client import SearchClient @@ -75,17 +74,27 @@ client.search_chunks( ) ``` -```python -# URL-based Search: retrieve all chunks of the document at that URL -client.get_document( - access_token="REDACTIVE-USER-ACCESS-TOKEN", - ref="https://example.com/document" -) +**Filters** may be applied to query-based search operations. At present, the following fields may be provided as filter predicates: + +```protobuf +message Filters { + // Scope of the query. This may either be the name of a provider, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + repeated string scope = 1; + // Timespan of response chunk's creation + optional TimeSpan created = 2; + // Timespan of response chunk's last modification + optional TimeSpan modified = 3; + // List of user emails associated with response chunk + repeated string user_emails = 4; + // Include content from documents in trash + optional bool include_content_in_trash = 5; +} ``` -### Filters - -Query methods, i.e. `query_chunks`, `query_chunks_by_document_name`, support a set of optional filters. The filters are applied in a logical 'AND' operation. If a data source provider does not support a filter-type, then no results from that provider are returned. +Filters may be populated and provided to a query in the following way for the Python SDK: ```python from datetime import datetime, timedelta @@ -115,6 +124,19 @@ client.query_chunks( ) ``` + +#### Document Fetch + +Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). + +```python +# URL-based Search: retrieve all chunks of the document at that URL +client.get_document( + access_token="REDACTIVE-USER-ACCESS-TOKEN", + ref="https://example.com/document" +) +``` + ### Multi-User Client The `MultiUserClient` class helps manage multiple users' authentication and access to the Redactive search service. @@ -142,8 +164,8 @@ is_connection_successful = await multi_user_client.handle_connection_callback( ) # User can now use Redactive search service via `MultiUserClient`'s other methods: -semantic_query = "Tell me about the missing research vessel, the Borealis" -chunks = await multi_user_client.query_chunks(user_id=user_id, semantic_query=semantic_query) +query = "Tell me about the missing research vessel, the Borealis" +chunks = await multi_user_client.search_chunks(user_id=user_id, query=query) ``` ## Development From 9233843fe4d9b60150431b90c0f5a14f597e8ee1 Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:35:27 +1100 Subject: [PATCH 06/12] Reduce scope of npm SDK --- sdks/node/src/multiUserClient.ts | 24 +----------- sdks/node/src/searchClient.test.ts | 59 +----------------------------- sdks/node/src/searchClient.ts | 42 +-------------------- 3 files changed, 5 insertions(+), 120 deletions(-) diff --git a/sdks/node/src/multiUserClient.ts b/sdks/node/src/multiUserClient.ts index 75df4f2..6b4c8bb 100644 --- a/sdks/node/src/multiUserClient.ts +++ b/sdks/node/src/multiUserClient.ts @@ -155,7 +155,7 @@ export class MultiUserClient { * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async searchChunksBySemantics({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { + async searchChunks({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -164,27 +164,7 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.searchChunksBySemantics({ accessToken: userData.idToken!, query, count, filters }); - } - - /** - * Query for relevant chunks containing the provided keywords - * @param userId - The ID of the user. - * @param query - The query string used to find relevant chunks. - * @param count - The number of relevant chunks to retrieve. Defaults to 10. - * @param filters - An object of filters for querying. Optional. - * @returns list of relevant chunks. - */ - async searchChunksByKeyword({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { - let userData = await this.readUserData(userId); - if (!userData || !userData.refreshToken) { - throw new Error(`No valid Redactive session for user '${userId}'`); - } - if (!!userData.idTokenExpiry && new Date(userData.idTokenExpiry) < new Date()) { - userData = await this._refreshUserData(userId, userData.refreshToken, undefined); - } - - return await this.searchClient.searchChunksByKeyword({ accessToken: userData.idToken!, query, count, filters }); + return await this.searchClient.searchChunks({ accessToken: userData.idToken!, query, count, filters }); } /** diff --git a/sdks/node/src/searchClient.test.ts b/sdks/node/src/searchClient.test.ts index b453727..a8dbe39 100644 --- a/sdks/node/src/searchClient.test.ts +++ b/sdks/node/src/searchClient.test.ts @@ -67,7 +67,7 @@ describe("Service client", () => { expect(response).toStrictEqual(expectedResponse); }); - it("should search chunks semantically", async () => { + it("should search chunks", async () => { const accessToken = "test-accessToken"; const query = "test-query"; const count = 1; @@ -116,62 +116,7 @@ describe("Service client", () => { const client = new SearchClient(); // Call the queryChunks method and capture the response - const response = await client.searchChunksBySemantics({ accessToken, query, count, filters }); - - // Assert that the response matches the expected response - expect(response).toStrictEqual(expectedResponse); - }); - - it("should search chunks by keyword", async () => { - const accessToken = "test-accessToken"; - const query = "test-query"; - const count = 1; - const filters: Partial = { - scope: ["dataprovider"], - created: { before: new Date() }, - modified: { after: new Date() }, - includeContentInTrash: true - }; - const expectedResponse: RelevantChunk[] = Array.from({ length: count }, (_, i) => ({ - source: { - system: `system-${i}`, - systemVersion: `systemVersion-${i}`, - documentId: `documentId-${i}`, - documentVersion: `documentVersion-${i}`, - connectionId: `connectionId-${i}`, - documentName: `documentName-${i}`, - documentPath: `documentPath-${i}` - } as SourceReference, - chunk: { - chunkHash: `chunkHash-${i}`, - chunkId: `chunkId-${i}`, - chunkingVersion: `chunkingVersion-${i}` - } as ChunkReference, - chunkBody: `chunkBody-${i}`, - documentMetadata: { - createdAt: undefined, - link: undefined, - modifiedAt: undefined - }, - relevance: { - similarityScore: 1.0 - } as RelevantChunk_Relevance - })); - - // Mock the _getClient method of SearchClient to return a mock gRPC client - vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - searchChunks: ( - _request: SearchChunksRequest, - _metadata: Metadata, - callback: (error: ServiceError | null, response: SearchChunksResponse) => void - ) => callback(null, SearchChunksResponse.fromJSON({ relevantChunks: expectedResponse })) - } as unknown as SearchServiceClient); - - // Create an instance of SearchClient - const client = new SearchClient(); - - // Call the queryChunks method and capture the response - const response = await client.searchChunksByKeyword({ accessToken, query, count, filters }); + const response = await client.searchChunks({ accessToken, query, count, filters }); // Assert that the response matches the expected response expect(response).toStrictEqual(expectedResponse); diff --git a/sdks/node/src/searchClient.ts b/sdks/node/src/searchClient.ts index 5584380..2e68974 100644 --- a/sdks/node/src/searchClient.ts +++ b/sdks/node/src/searchClient.ts @@ -60,7 +60,7 @@ export class SearchClient { * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async searchChunksBySemantics({ + async searchChunks({ accessToken, query, count = 10, @@ -92,46 +92,6 @@ export class SearchClient { return response.relevantChunks; } - /** - * Query for relevant chunks based on keywords. - * @param accessToken - The user's Redactive access token. - * @param query - The query string used to find relevant chunks. - * @param count - The number of relevant chunks to retrieve. Defaults to 10. - * @param filters - An object of filters for querying. Optional. - * @returns list of relevant chunks. - */ - async searchChunksByKeyword({ - accessToken, - query, - count = 10, - filters - }: SearchChunksParams): Promise { - const requestMetadata = new Metadata(); - requestMetadata.set("Authorization", `Bearer ${accessToken}`); - requestMetadata.set("User-Agent", "redactive-sdk-node"); - - const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const query_obj: Query = { keywordQuery: query }; - const _filters: Filters = { scope: [], userEmails: [], ...filters }; - const searchRequest: SearchChunksRequest = { - query: query_obj, - count, - filters: filters ? _filters : undefined - }; - - const response = await new Promise((resolve, reject) => { - client.searchChunks(searchRequest, requestMetadata, (err, response) => { - if (err) { - reject(err); - return; - } - - return resolve(response); - }); - }); - return response.relevantChunks; - } - /** * Get chunks for a document via a specific reference * @param accessToken - The user's Redactive access token. From 4682082d8c8ff6e215c7db3f87635cabac7d40a7 Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:36:42 +1100 Subject: [PATCH 07/12] Fix failing test --- sdks/node/src/multiUserClient.test.ts | 39 ++++----------------------- 1 file changed, 5 insertions(+), 34 deletions(-) diff --git a/sdks/node/src/multiUserClient.test.ts b/sdks/node/src/multiUserClient.test.ts index dc57963..c4f734f 100644 --- a/sdks/node/src/multiUserClient.test.ts +++ b/sdks/node/src/multiUserClient.test.ts @@ -141,12 +141,12 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValue(undefined); - await expect(multiUserClient.searchChunksBySemantics({ userId, query })).rejects.toThrow( + await expect(multiUserClient.searchChunks({ userId, query })).rejects.toThrow( `No valid Redactive session for user '${userId}'` ); }); - it("should query chunks semantically after refreshing idToken", async () => { + it("should query chunks after refreshing idToken", async () => { const userId = "user123"; const query = "query"; const idToken = "idToken123"; @@ -166,42 +166,13 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.searchChunksBySemantics.mockResolvedValue(chunks as unknown as RelevantChunk[]); + mockSearchClient.searchChunks.mockResolvedValue(chunks as unknown as RelevantChunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.searchChunksBySemantics({ userId, query }); + const result = await multiUserClient.searchChunks({ userId, query }); expect(result).toEqual(chunks); - expect(mockSearchClient.searchChunksBySemantics).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); - }); - - it("should query chunks by keyword after refreshing idToken", async () => { - const userId = "user123"; - const query = "query"; - const idToken = "idToken123"; - const refreshToken = "refreshToken123"; - const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; - - const expiredUserData: UserData = { - idToken, - idTokenExpiry: new Date(Date.now() - 1000), - refreshToken - }; - const refreshedUserData: UserData = { - idToken, - idTokenExpiry: new Date(Date.now() + 3600 * 1000), - refreshToken - }; - - readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); - multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.searchChunksByKeyword.mockResolvedValue(chunks as unknown as RelevantChunk[]); - - multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.searchChunksByKeyword({ userId, query }); - - expect(result).toEqual(chunks); - expect(mockSearchClient.searchChunksByKeyword).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); + expect(mockSearchClient.searchChunks).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); }); it("should query chunks by document ref after refreshing idToken", async () => { From dd797b60fe05fca0cba6135403d5c2edf395533b Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:50:04 +1100 Subject: [PATCH 08/12] Fix Node doco --- sdks/node/README.md | 71 +++++++++++++++++++++++++++++++------------ sdks/python/README.md | 12 +++++--- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/sdks/node/README.md b/sdks/node/README.md index 4cec228..2c51945 100644 --- a/sdks/node/README.md +++ b/sdks/node/README.md @@ -52,11 +52,11 @@ const response = await client.exchangeTokens("OAUTH2-AUTH-CODE"); ### SearchClient -With the Redactive access_token, you can perform three types of searches using the Redactive Search service: +With the Redactive `access_token`, you can perform two types of search -1. **Semantic Query Search**: Retrieve relevant chunks of information that are semantically related to a user query. -2. **URL-based Search**: Obtain all the chunks from a document by specifying its URL. -3. **Document Name Search**: Query for all the chunks from a document based on the name of the document. +#### Query-based Search + +Retrieve relevant chunks of information that are related to a user query. ```javascript import { SearchClient } from "@redactive/redactive"; @@ -64,24 +64,38 @@ import { SearchClient } from "@redactive/redactive"; const client = new SearchClient(); const accessToken = "REDACTIVE-ACCESS-TOKEN"; -// Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query -const semanticQuery = "Tell me about AI"; -await client.queryChunks({ accessToken, semanticQuery }); - -// URL-based Search: retrieve all chunks of the document at that URL -const url = "https://example.com/document"; -await client.getChunksByUrl({ accessToken, url }); +// Query-based Search: retrieve text extracts (chunks) from various documents pertaining to the user query +const query = "Tell me about AI"; +await client.searchChunks({ accessToken, query }); +``` -// Document Name Search : retrieve all chunks of a document identified by its name -const documentName = "AI Research Paper"; -await client.queryChunksByDocumentName({ accessToken, documentName }); +**Filters** may be applied to query-based search operations. At present, the following fields may be provided as filter predicates: + +```protobuf +message Filters { + // Scope of the query. This may either be the name of a provider, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + repeated string scope = 1; + // Timespan of response chunk's creation + optional TimeSpan created = 2; + // Timespan of response chunk's last modification + optional TimeSpan modified = 3; + // List of user emails associated with response chunk + repeated string user_emails = 4; + // Include content from documents in trash + optional bool include_content_in_trash = 5; +} ``` -### Filters +The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, +the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no +results from that provider are returned. -Query methods, i.e. `queryChunks`, `queryChunksByDocumentName`, support a set of optional filters. The filters are applied in a logical 'AND' operation. If a data source provider does not support a filter-type, then no results from that provider are returned. +Filters may be populated and provided to a query in the following way for the NodeJS SDK: -```typescript +```javascript import { Filters } from "@redactive/redactive/grpc/search"; // Query chunks from Confluence only, that are from documents created before last week, modified since last week, @@ -98,7 +112,24 @@ const filters: Filters = { userEmails: ["myEmail@example.com"], includeContentInTrash: true }; -await client.queryChunks({ accessToken, semanticQuery, filters }); +await client.searchChunks({ accessToken, semanticQuery, filters }); + +``` + +#### Document Fetch + +Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). + + +```javascript +import { SearchClient } from "@redactive/redactive"; + +const client = new SearchClient(); +const accessToken = "REDACTIVE-ACCESS-TOKEN"; + +// URL-based Search: retrieve all chunks of the document at that URL +const url = "https://example.com/document"; +await client.getDocument({ accessToken, url }); ``` ### Multi-User Client @@ -124,8 +155,8 @@ let [signInCode, state] = ["", ""]; // from URL query parameters const isConnectionSuccessful = await multiUserClient.handleConnectionCallback(userId, signInCode, state); // User can now use Redactive search service via `MultiUserClient`'s other methods: -const semanticQuery = "Tell me about the missing research vessel, the Borealis"; -const chunks = await multiUserClient.queryChunks({ userId, semanticQuery }); +const query = "Tell me about the missing research vessel, the Borealis"; +const chunks = await multiUserClient.searchChunks({ userId, query }); ``` ## Development diff --git a/sdks/python/README.md b/sdks/python/README.md index 81d5faf..615713e 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -56,7 +56,7 @@ response = await client.exchange_tokens(code="OAUTH2-TOKEN") ### SearchClient -With a Redactive access_token, you can perform two types of search +With a Redactive `access_token`, you can perform two types of search #### Query-based Search @@ -94,12 +94,16 @@ message Filters { } ``` +The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, +the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no +results from that provider are returned. + Filters may be populated and provided to a query in the following way for the Python SDK: ```python from datetime import datetime, timedelta from redactive.search_client import SearchClient -from redactive.grpc.v1 import Filters +from redactive.grpc.v2 import Filters client = SearchClient() @@ -117,7 +121,7 @@ filters = Filters().from_dict({ "userEmails": ["myEmail@example.com"], "includeContentInTrash": True, }) -client.query_chunks( +client.search_chunks( access_token="REDACTIVE-USER-ACCESS-TOKEN", semantic_query="Tell me about AI", filters=filters @@ -127,7 +131,7 @@ client.query_chunks( #### Document Fetch -Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). +Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). ```python # URL-based Search: retrieve all chunks of the document at that URL From de989bb57b7091f0933829807c5794df0de9f6b9 Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 14:58:50 +1100 Subject: [PATCH 09/12] Update comments in protos --- protos/chunks.proto | 10 ++++---- sdks/node/src/grpc/chunks.ts | 10 ++++---- sdks/python/src/redactive/grpc/v2/__init__.py | 23 ++++++++----------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/protos/chunks.proto b/protos/chunks.proto index f223946..f9aba58 100644 --- a/protos/chunks.proto +++ b/protos/chunks.proto @@ -14,15 +14,15 @@ message ChunkMetadata { } message SourceReference { - // Source system of the document e.g. confluence, slack, local_file_system + // Source system of the document e.g. confluence, sharepoint string system = 1; // Version of the source system e.g. 1.0.0 string system_version = 2; - // Connection id to the source system e.g. confluence space id, slack channel id, local file hostname + // Connection id to the source system e.g. confluence space id, sharepoint drive id string connection_id = 3; - // Document id in the source system e.g. confluence page id, slack message id, local file path + // Document id in the source system e.g. confluence page id, sharepoint file id string document_id = 4; - // Document version in the source system e.g. confluence page version, slack message version, local file version hash + // Document version in the source system e.g. confluence page version, sharepoint file hash string document_version = 5; // Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" // or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" @@ -34,7 +34,7 @@ message SourceReference { message ChunkReference { // Chunking version e.g. 1.0.0 string chunking_version = 1; - // chunk id is unique within the document, but not globally unique, it's actually the index of the chunk in the document + // chunk id is unique within the document, but not globally unique. string chunk_id = 2; // SHA256 hash of the chunk body string chunk_hash = 3; diff --git a/sdks/node/src/grpc/chunks.ts b/sdks/node/src/grpc/chunks.ts index 8cb3628..79f9669 100644 --- a/sdks/node/src/grpc/chunks.ts +++ b/sdks/node/src/grpc/chunks.ts @@ -21,15 +21,15 @@ export interface ChunkMetadata { } export interface SourceReference { - /** Source system of the document e.g. confluence, slack, local_file_system */ + /** Source system of the document e.g. confluence, sharepoint */ system: string; /** Version of the source system e.g. 1.0.0 */ systemVersion: string; - /** Connection id to the source system e.g. confluence space id, slack channel id, local file hostname */ + /** Connection id to the source system e.g. confluence space id, sharepoint drive id */ connectionId: string; - /** Document id in the source system e.g. confluence page id, slack message id, local file path */ + /** Document id in the source system e.g. confluence page id, sharepoint file id */ documentId: string; - /** Document version in the source system e.g. confluence page version, slack message version, local file version hash */ + /** Document version in the source system e.g. confluence page version, sharepoint file hash */ documentVersion: string; /** * Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" @@ -45,7 +45,7 @@ export interface SourceReference { export interface ChunkReference { /** Chunking version e.g. 1.0.0 */ chunkingVersion: string; - /** chunk id is unique within the document, but not globally unique, it's actually the index of the chunk in the document */ + /** chunk id is unique within the document, but not globally unique. */ chunkId: string; /** SHA256 hash of the chunk body */ chunkHash: string; diff --git a/sdks/python/src/redactive/grpc/v2/__init__.py b/sdks/python/src/redactive/grpc/v2/__init__.py index ae76c3c..cdaedc9 100644 --- a/sdks/python/src/redactive/grpc/v2/__init__.py +++ b/sdks/python/src/redactive/grpc/v2/__init__.py @@ -42,29 +42,27 @@ class ChunkMetadata(betterproto.Message): @dataclass(eq=False, repr=False) class SourceReference(betterproto.Message): system: str = betterproto.string_field(1) - """ - Source system of the document e.g. confluence, slack, local_file_system - """ + """Source system of the document e.g. confluence, sharepoint""" system_version: str = betterproto.string_field(2) """Version of the source system e.g. 1.0.0""" connection_id: str = betterproto.string_field(3) """ - Connection id to the source system e.g. confluence space id, slack channel - id, local file hostname + Connection id to the source system e.g. confluence space id, sharepoint + drive id """ document_id: str = betterproto.string_field(4) """ - Document id in the source system e.g. confluence page id, slack message id, - local file path + Document id in the source system e.g. confluence page id, sharepoint file + id """ document_version: str = betterproto.string_field(5) """ - Document version in the source system e.g. confluence page version, slack - message version, local file version hash + Document version in the source system e.g. confluence page version, + sharepoint file hash """ document_path: Optional[str] = betterproto.string_field( @@ -89,10 +87,7 @@ class ChunkReference(betterproto.Message): """Chunking version e.g. 1.0.0""" chunk_id: str = betterproto.string_field(2) - """ - chunk id is unique within the document, but not globally unique, it's - actually the index of the chunk in the document - """ + """chunk id is unique within the document, but not globally unique.""" chunk_hash: str = betterproto.string_field(3) """SHA256 hash of the chunk body""" @@ -172,7 +167,7 @@ class Filters(betterproto.Message): of documents. Subspaces take the form of :/// e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding - Guide'. For Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared + Guide' for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' """ From fab38088931376ad32d74c830598c6c80add8d15 Mon Sep 17 00:00:00 2001 From: Angus White Date: Mon, 18 Nov 2024 15:06:23 +1100 Subject: [PATCH 10/12] Format codebase --- sdks/node/README.md | 5 +- sdks/node/src/grpc/chunks.ts | 111 ++++++++-------- sdks/node/src/grpc/google/protobuf/struct.ts | 63 +++++---- .../src/grpc/google/protobuf/timestamp.ts | 21 +-- sdks/node/src/grpc/search.ts | 122 ++++++++---------- sdks/node/src/multiUserClient.test.ts | 1 - sdks/node/src/multiUserClient.ts | 8 +- sdks/node/src/searchClient.ts | 13 +- sdks/python/src/redactive/grpc/v2/__init__.py | 70 +++------- .../src/redactive/reranking/reranker.py | 4 +- sdks/python/src/redactive/search_client.py | 5 +- .../unit_tests/multi_user_client_tests.py | 6 +- .../tests/unit_tests/search_client_tests.py | 2 +- 13 files changed, 180 insertions(+), 251 deletions(-) diff --git a/sdks/node/README.md b/sdks/node/README.md index 2c51945..4281ac2 100644 --- a/sdks/node/README.md +++ b/sdks/node/README.md @@ -89,8 +89,8 @@ message Filters { } ``` -The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, -the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no +The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, +the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no results from that provider are returned. Filters may be populated and provided to a query in the following way for the NodeJS SDK: @@ -120,7 +120,6 @@ await client.searchChunks({ accessToken, semanticQuery, filters }); Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). - ```javascript import { SearchClient } from "@redactive/redactive"; diff --git a/sdks/node/src/grpc/chunks.ts b/sdks/node/src/grpc/chunks.ts index 79f9669..71f1153 100644 --- a/sdks/node/src/grpc/chunks.ts +++ b/sdks/node/src/grpc/chunks.ts @@ -6,15 +6,14 @@ /* eslint-disable */ import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; + import { Timestamp } from "./google/protobuf/timestamp"; export const protobufPackage = "redactive.grpc.v2"; export interface ChunkMetadata { /** Chunk content's creation timestamp */ - createdAt?: - | Date - | undefined; + createdAt?: Date | undefined; /** Chunk content's last modified timestamp */ modifiedAt?: Date | undefined; link?: string | undefined; @@ -35,9 +34,7 @@ export interface SourceReference { * Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" * or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" */ - documentPath?: - | string - | undefined; + documentPath?: string | undefined; /** Document name in the source system e.g. "document.txt" */ documentName?: string | undefined; } @@ -54,17 +51,11 @@ export interface ChunkReference { /** A chunk is a part of a document */ export interface RelevantChunk { /** Source reference of the document */ - source: - | SourceReference - | undefined; + source: SourceReference | undefined; /** Chunk reference of the chunk */ - chunk: - | ChunkReference - | undefined; + chunk: ChunkReference | undefined; /** Relevance of the chunk */ - relevance: - | RelevantChunk_Relevance - | undefined; + relevance: RelevantChunk_Relevance | undefined; /** Chunk body */ chunkBody: string; /** Document metadata */ @@ -79,13 +70,9 @@ export interface RelevantChunk_Relevance { /** A chunk is a part of a document */ export interface Chunk { /** Source reference of the document */ - source: - | SourceReference - | undefined; + source: SourceReference | undefined; /** Chunk reference of the chunk */ - chunk: - | ChunkReference - | undefined; + chunk: ChunkReference | undefined; /** Chunk body */ chunkBody: string; /** Document metadata */ @@ -154,7 +141,7 @@ export const ChunkMetadata: MessageFns = { return { createdAt: isSet(object.createdAt) ? fromJsonTimestamp(object.createdAt) : undefined, modifiedAt: isSet(object.modifiedAt) ? fromJsonTimestamp(object.modifiedAt) : undefined, - link: isSet(object.link) ? globalThis.String(object.link) : undefined, + link: isSet(object.link) ? globalThis.String(object.link) : undefined }; }, @@ -181,7 +168,7 @@ export const ChunkMetadata: MessageFns = { message.modifiedAt = object.modifiedAt ?? undefined; message.link = object.link ?? undefined; return message; - }, + } }; function createBaseSourceReference(): SourceReference { @@ -192,7 +179,7 @@ function createBaseSourceReference(): SourceReference { documentId: "", documentVersion: "", documentPath: undefined, - documentName: undefined, + documentName: undefined }; } @@ -302,7 +289,7 @@ export const SourceReference: MessageFns = { documentId: isSet(object.documentId) ? globalThis.String(object.documentId) : "", documentVersion: isSet(object.documentVersion) ? globalThis.String(object.documentVersion) : "", documentPath: isSet(object.documentPath) ? globalThis.String(object.documentPath) : undefined, - documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : undefined, + documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : undefined }; }, @@ -345,7 +332,7 @@ export const SourceReference: MessageFns = { message.documentPath = object.documentPath ?? undefined; message.documentName = object.documentName ?? undefined; return message; - }, + } }; function createBaseChunkReference(): ChunkReference { @@ -410,7 +397,7 @@ export const ChunkReference: MessageFns = { return { chunkingVersion: isSet(object.chunkingVersion) ? globalThis.String(object.chunkingVersion) : "", chunkId: isSet(object.chunkId) ? globalThis.String(object.chunkId) : "", - chunkHash: isSet(object.chunkHash) ? globalThis.String(object.chunkHash) : "", + chunkHash: isSet(object.chunkHash) ? globalThis.String(object.chunkHash) : "" }; }, @@ -437,7 +424,7 @@ export const ChunkReference: MessageFns = { message.chunkId = object.chunkId ?? ""; message.chunkHash = object.chunkHash ?? ""; return message; - }, + } }; function createBaseRelevantChunk(): RelevantChunk { @@ -526,7 +513,7 @@ export const RelevantChunk: MessageFns = { chunk: isSet(object.chunk) ? ChunkReference.fromJSON(object.chunk) : undefined, relevance: isSet(object.relevance) ? RelevantChunk_Relevance.fromJSON(object.relevance) : undefined, chunkBody: isSet(object.chunkBody) ? globalThis.String(object.chunkBody) : "", - documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined, + documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined }; }, @@ -555,21 +542,21 @@ export const RelevantChunk: MessageFns = { }, fromPartial, I>>(object: I): RelevantChunk { const message = createBaseRelevantChunk(); - message.source = (object.source !== undefined && object.source !== null) - ? SourceReference.fromPartial(object.source) - : undefined; - message.chunk = (object.chunk !== undefined && object.chunk !== null) - ? ChunkReference.fromPartial(object.chunk) - : undefined; - message.relevance = (object.relevance !== undefined && object.relevance !== null) - ? RelevantChunk_Relevance.fromPartial(object.relevance) - : undefined; + message.source = + object.source !== undefined && object.source !== null ? SourceReference.fromPartial(object.source) : undefined; + message.chunk = + object.chunk !== undefined && object.chunk !== null ? ChunkReference.fromPartial(object.chunk) : undefined; + message.relevance = + object.relevance !== undefined && object.relevance !== null + ? RelevantChunk_Relevance.fromPartial(object.relevance) + : undefined; message.chunkBody = object.chunkBody ?? ""; - message.documentMetadata = (object.documentMetadata !== undefined && object.documentMetadata !== null) - ? ChunkMetadata.fromPartial(object.documentMetadata) - : undefined; + message.documentMetadata = + object.documentMetadata !== undefined && object.documentMetadata !== null + ? ChunkMetadata.fromPartial(object.documentMetadata) + : undefined; return message; - }, + } }; function createBaseRelevantChunk_Relevance(): RelevantChunk_Relevance { @@ -627,7 +614,7 @@ export const RelevantChunk_Relevance: MessageFns = { const message = createBaseRelevantChunk_Relevance(); message.similarityScore = object.similarityScore ?? 0; return message; - }, + } }; function createBaseChunk(): Chunk { @@ -704,7 +691,7 @@ export const Chunk: MessageFns = { source: isSet(object.source) ? SourceReference.fromJSON(object.source) : undefined, chunk: isSet(object.chunk) ? ChunkReference.fromJSON(object.chunk) : undefined, chunkBody: isSet(object.chunkBody) ? globalThis.String(object.chunkBody) : "", - documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined, + documentMetadata: isSet(object.documentMetadata) ? ChunkMetadata.fromJSON(object.documentMetadata) : undefined }; }, @@ -730,30 +717,34 @@ export const Chunk: MessageFns = { }, fromPartial, I>>(object: I): Chunk { const message = createBaseChunk(); - message.source = (object.source !== undefined && object.source !== null) - ? SourceReference.fromPartial(object.source) - : undefined; - message.chunk = (object.chunk !== undefined && object.chunk !== null) - ? ChunkReference.fromPartial(object.chunk) - : undefined; + message.source = + object.source !== undefined && object.source !== null ? SourceReference.fromPartial(object.source) : undefined; + message.chunk = + object.chunk !== undefined && object.chunk !== null ? ChunkReference.fromPartial(object.chunk) : undefined; message.chunkBody = object.chunkBody ?? ""; - message.documentMetadata = (object.documentMetadata !== undefined && object.documentMetadata !== null) - ? ChunkMetadata.fromPartial(object.documentMetadata) - : undefined; + message.documentMetadata = + object.documentMetadata !== undefined && object.documentMetadata !== null + ? ChunkMetadata.fromPartial(object.documentMetadata) + : undefined; return message; - }, + } }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin ? T - : T extends globalThis.Array ? globalThis.Array> - : T extends ReadonlyArray ? ReadonlyArray> - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin + ? T + : T extends globalThis.Array + ? globalThis.Array> + : T extends ReadonlyArray + ? ReadonlyArray> + : T extends {} + ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P +export type Exact = P extends Builtin + ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function toTimestamp(date: Date): Timestamp { diff --git a/sdks/node/src/grpc/google/protobuf/struct.ts b/sdks/node/src/grpc/google/protobuf/struct.ts index 25549bb..509fe72 100644 --- a/sdks/node/src/grpc/google/protobuf/struct.ts +++ b/sdks/node/src/grpc/google/protobuf/struct.ts @@ -18,7 +18,7 @@ export const protobufPackage = "google.protobuf"; export enum NullValue { /** NULL_VALUE - Null value. */ NULL_VALUE = 0, - UNRECOGNIZED = -1, + UNRECOGNIZED = -1 } export function nullValueFromJSON(object: any): NullValue { @@ -73,25 +73,15 @@ export interface Struct_FieldsEntry { */ export interface Value { /** Represents a null value. */ - nullValue?: - | NullValue - | undefined; + nullValue?: NullValue | undefined; /** Represents a double value. */ - numberValue?: - | number - | undefined; + numberValue?: number | undefined; /** Represents a string value. */ - stringValue?: - | string - | undefined; + stringValue?: string | undefined; /** Represents a boolean value. */ - boolValue?: - | boolean - | undefined; + boolValue?: boolean | undefined; /** Represents a structured value. */ - structValue?: - | { [key: string]: any } - | undefined; + structValue?: { [key: string]: any } | undefined; /** Represents a repeated `Value`. */ listValue?: Array | undefined; } @@ -151,10 +141,10 @@ export const Struct: MessageFns & StructWrapperFns = { return { fields: isObject(object.fields) ? Object.entries(object.fields).reduce<{ [key: string]: any | undefined }>((acc, [key, value]) => { - acc[key] = value as any | undefined; - return acc; - }, {}) - : {}, + acc[key] = value as any | undefined; + return acc; + }, {}) + : {} }; }, @@ -184,7 +174,7 @@ export const Struct: MessageFns & StructWrapperFns = { } return acc; }, - {}, + {} ); return message; }, @@ -208,7 +198,7 @@ export const Struct: MessageFns & StructWrapperFns = { } } return object; - }, + } }; function createBaseStruct_FieldsEntry(): Struct_FieldsEntry { @@ -261,7 +251,7 @@ export const Struct_FieldsEntry: MessageFns = { fromJSON(object: any): Struct_FieldsEntry { return { key: isSet(object.key) ? globalThis.String(object.key) : "", - value: isSet(object?.value) ? object.value : undefined, + value: isSet(object?.value) ? object.value : undefined }; }, @@ -284,7 +274,7 @@ export const Struct_FieldsEntry: MessageFns = { message.key = object.key ?? ""; message.value = object.value ?? undefined; return message; - }, + } }; function createBaseValue(): Value { @@ -294,7 +284,7 @@ function createBaseValue(): Value { stringValue: undefined, boolValue: undefined, structValue: undefined, - listValue: undefined, + listValue: undefined }; } @@ -392,7 +382,7 @@ export const Value: MessageFns & AnyValueWrapperFns = { stringValue: isSet(object.stringValue) ? globalThis.String(object.stringValue) : undefined, boolValue: isSet(object.boolValue) ? globalThis.Boolean(object.boolValue) : undefined, structValue: isObject(object.structValue) ? object.structValue : undefined, - listValue: globalThis.Array.isArray(object.listValue) ? [...object.listValue] : undefined, + listValue: globalThis.Array.isArray(object.listValue) ? [...object.listValue] : undefined }; }, @@ -468,7 +458,7 @@ export const Value: MessageFns & AnyValueWrapperFns = { return null; } return undefined; - }, + } }; function createBaseListValue(): ListValue { @@ -540,19 +530,24 @@ export const ListValue: MessageFns & ListValueWrapperFns = { } else { return message as any; } - }, + } }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin ? T - : T extends globalThis.Array ? globalThis.Array> - : T extends ReadonlyArray ? ReadonlyArray> - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin + ? T + : T extends globalThis.Array + ? globalThis.Array> + : T extends ReadonlyArray + ? ReadonlyArray> + : T extends {} + ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P +export type Exact = P extends Builtin + ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function isObject(value: any): boolean { diff --git a/sdks/node/src/grpc/google/protobuf/timestamp.ts b/sdks/node/src/grpc/google/protobuf/timestamp.ts index 20a7698..69605ad 100644 --- a/sdks/node/src/grpc/google/protobuf/timestamp.ts +++ b/sdks/node/src/grpc/google/protobuf/timestamp.ts @@ -166,7 +166,7 @@ export const Timestamp: MessageFns = { fromJSON(object: any): Timestamp { return { seconds: isSet(object.seconds) ? globalThis.Number(object.seconds) : 0, - nanos: isSet(object.nanos) ? globalThis.Number(object.nanos) : 0, + nanos: isSet(object.nanos) ? globalThis.Number(object.nanos) : 0 }; }, @@ -189,19 +189,24 @@ export const Timestamp: MessageFns = { message.seconds = object.seconds ?? 0; message.nanos = object.nanos ?? 0; return message; - }, + } }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin ? T - : T extends globalThis.Array ? globalThis.Array> - : T extends ReadonlyArray ? ReadonlyArray> - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin + ? T + : T extends globalThis.Array + ? globalThis.Array> + : T extends ReadonlyArray + ? ReadonlyArray> + : T extends {} + ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P +export type Exact = P extends Builtin + ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function longToNumber(int64: { toString(): string }): number { diff --git a/sdks/node/src/grpc/search.ts b/sdks/node/src/grpc/search.ts index 026c452..b59b53b 100644 --- a/sdks/node/src/grpc/search.ts +++ b/sdks/node/src/grpc/search.ts @@ -7,17 +7,18 @@ /* eslint-disable */ import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; import { - type CallOptions, ChannelCredentials, Client, + makeGenericClientConstructor, + Metadata, + type CallOptions, type ClientOptions, type ClientUnaryCall, type handleUnaryCall, - makeGenericClientConstructor, - Metadata, type ServiceError, - type UntypedServiceImplementation, + type UntypedServiceImplementation } from "@grpc/grpc-js"; + import { Chunk, RelevantChunk } from "./chunks"; import { Struct } from "./google/protobuf/struct"; import { Timestamp } from "./google/protobuf/timestamp"; @@ -26,9 +27,7 @@ export const protobufPackage = "redactive.grpc.v2"; export interface Query { /** Search query for semantic content */ - semanticQuery?: - | string - | undefined; + semanticQuery?: string | undefined; /** Specific keywords to search for in source document */ keywordQuery?: string | undefined; } @@ -47,13 +46,9 @@ export interface Filters { */ scope: string[]; /** Timespan of response chunk's creation */ - created?: - | TimeSpan - | undefined; + created?: TimeSpan | undefined; /** Timespan of response chunk's last modification */ - modified?: - | TimeSpan - | undefined; + modified?: TimeSpan | undefined; /** List of user emails associated with response chunk */ userEmails: string[]; /** Include content from documents in trash */ @@ -62,13 +57,9 @@ export interface Filters { export interface SearchChunksRequest { /** How many results to try to return (maximum number of results) */ - count?: - | number - | undefined; + count?: number | undefined; /** The query to execute */ - query: - | Query - | undefined; + query: Query | undefined; /** Filters to apply to query */ filters?: Filters | undefined; } @@ -84,9 +75,7 @@ export interface SearchChunksResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ - error?: - | { [key: string]: any } - | undefined; + error?: { [key: string]: any } | undefined; /** List of relevant chunks */ relevantChunks: RelevantChunk[]; } @@ -95,9 +84,7 @@ export interface GetDocumentResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ - error?: - | { [key: string]: any } - | undefined; + error?: { [key: string]: any } | undefined; /** List of relevant chunks */ chunks: Chunk[]; } @@ -152,7 +139,7 @@ export const Query: MessageFns = { fromJSON(object: any): Query { return { semanticQuery: isSet(object.semanticQuery) ? globalThis.String(object.semanticQuery) : undefined, - keywordQuery: isSet(object.keywordQuery) ? globalThis.String(object.keywordQuery) : undefined, + keywordQuery: isSet(object.keywordQuery) ? globalThis.String(object.keywordQuery) : undefined }; }, @@ -175,7 +162,7 @@ export const Query: MessageFns = { message.semanticQuery = object.semanticQuery ?? undefined; message.keywordQuery = object.keywordQuery ?? undefined; return message; - }, + } }; function createBaseTimeSpan(): TimeSpan { @@ -228,7 +215,7 @@ export const TimeSpan: MessageFns = { fromJSON(object: any): TimeSpan { return { after: isSet(object.after) ? fromJsonTimestamp(object.after) : undefined, - before: isSet(object.before) ? fromJsonTimestamp(object.before) : undefined, + before: isSet(object.before) ? fromJsonTimestamp(object.before) : undefined }; }, @@ -251,7 +238,7 @@ export const TimeSpan: MessageFns = { message.after = object.after ?? undefined; message.before = object.before ?? undefined; return message; - }, + } }; function createBaseFilters(): Filters { @@ -344,7 +331,7 @@ export const Filters: MessageFns = { : [], includeContentInTrash: isSet(object.includeContentInTrash) ? globalThis.Boolean(object.includeContentInTrash) - : undefined, + : undefined }; }, @@ -374,16 +361,14 @@ export const Filters: MessageFns = { fromPartial, I>>(object: I): Filters { const message = createBaseFilters(); message.scope = object.scope?.map((e) => e) || []; - message.created = (object.created !== undefined && object.created !== null) - ? TimeSpan.fromPartial(object.created) - : undefined; - message.modified = (object.modified !== undefined && object.modified !== null) - ? TimeSpan.fromPartial(object.modified) - : undefined; + message.created = + object.created !== undefined && object.created !== null ? TimeSpan.fromPartial(object.created) : undefined; + message.modified = + object.modified !== undefined && object.modified !== null ? TimeSpan.fromPartial(object.modified) : undefined; message.userEmails = object.userEmails?.map((e) => e) || []; message.includeContentInTrash = object.includeContentInTrash ?? undefined; return message; - }, + } }; function createBaseSearchChunksRequest(): SearchChunksRequest { @@ -448,7 +433,7 @@ export const SearchChunksRequest: MessageFns = { return { count: isSet(object.count) ? globalThis.Number(object.count) : undefined, query: isSet(object.query) ? Query.fromJSON(object.query) : undefined, - filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined, + filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined }; }, @@ -472,12 +457,11 @@ export const SearchChunksRequest: MessageFns = { fromPartial, I>>(object: I): SearchChunksRequest { const message = createBaseSearchChunksRequest(); message.count = object.count ?? undefined; - message.query = (object.query !== undefined && object.query !== null) ? Query.fromPartial(object.query) : undefined; - message.filters = (object.filters !== undefined && object.filters !== null) - ? Filters.fromPartial(object.filters) - : undefined; + message.query = object.query !== undefined && object.query !== null ? Query.fromPartial(object.query) : undefined; + message.filters = + object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; return message; - }, + } }; function createBaseGetDocumentRequest(): GetDocumentRequest { @@ -530,7 +514,7 @@ export const GetDocumentRequest: MessageFns = { fromJSON(object: any): GetDocumentRequest { return { ref: isSet(object.ref) ? globalThis.String(object.ref) : "", - filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined, + filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined }; }, @@ -551,11 +535,10 @@ export const GetDocumentRequest: MessageFns = { fromPartial, I>>(object: I): GetDocumentRequest { const message = createBaseGetDocumentRequest(); message.ref = object.ref ?? ""; - message.filters = (object.filters !== undefined && object.filters !== null) - ? Filters.fromPartial(object.filters) - : undefined; + message.filters = + object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; return message; - }, + } }; function createBaseSearchChunksResponse(): SearchChunksResponse { @@ -622,7 +605,7 @@ export const SearchChunksResponse: MessageFns = { error: isObject(object.error) ? object.error : undefined, relevantChunks: globalThis.Array.isArray(object?.relevantChunks) ? object.relevantChunks.map((e: any) => RelevantChunk.fromJSON(e)) - : [], + : [] }; }, @@ -649,7 +632,7 @@ export const SearchChunksResponse: MessageFns = { message.error = object.error ?? undefined; message.relevantChunks = object.relevantChunks?.map((e) => RelevantChunk.fromPartial(e)) || []; return message; - }, + } }; function createBaseGetDocumentResponse(): GetDocumentResponse { @@ -714,7 +697,7 @@ export const GetDocumentResponse: MessageFns = { return { success: isSet(object.success) ? globalThis.Boolean(object.success) : false, error: isObject(object.error) ? object.error : undefined, - chunks: globalThis.Array.isArray(object?.chunks) ? object.chunks.map((e: any) => Chunk.fromJSON(e)) : [], + chunks: globalThis.Array.isArray(object?.chunks) ? object.chunks.map((e: any) => Chunk.fromJSON(e)) : [] }; }, @@ -741,7 +724,7 @@ export const GetDocumentResponse: MessageFns = { message.error = object.error ?? undefined; message.chunks = object.chunks?.map((e) => Chunk.fromPartial(e)) || []; return message; - }, + } }; export type SearchService = typeof SearchService; @@ -754,7 +737,7 @@ export const SearchService = { requestSerialize: (value: SearchChunksRequest) => Buffer.from(SearchChunksRequest.encode(value).finish()), requestDeserialize: (value: Buffer) => SearchChunksRequest.decode(value), responseSerialize: (value: SearchChunksResponse) => Buffer.from(SearchChunksResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => SearchChunksResponse.decode(value), + responseDeserialize: (value: Buffer) => SearchChunksResponse.decode(value) }, /** Query the index for all chunks of a specific document */ getDocument: { @@ -764,8 +747,8 @@ export const SearchService = { requestSerialize: (value: GetDocumentRequest) => Buffer.from(GetDocumentRequest.encode(value).finish()), requestDeserialize: (value: Buffer) => GetDocumentRequest.decode(value), responseSerialize: (value: GetDocumentResponse) => Buffer.from(GetDocumentResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => GetDocumentResponse.decode(value), - }, + responseDeserialize: (value: Buffer) => GetDocumentResponse.decode(value) + } } as const; export interface SearchServer extends UntypedServiceImplementation { @@ -779,34 +762,34 @@ export interface SearchClient extends Client { /** Query the index for relevant chunks */ searchChunks( request: SearchChunksRequest, - callback: (error: ServiceError | null, response: SearchChunksResponse) => void, + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; searchChunks( request: SearchChunksRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: SearchChunksResponse) => void, + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; searchChunks( request: SearchChunksRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: SearchChunksResponse) => void, + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; /** Query the index for all chunks of a specific document */ getDocument( request: GetDocumentRequest, - callback: (error: ServiceError | null, response: GetDocumentResponse) => void, + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; getDocument( request: GetDocumentRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: GetDocumentResponse) => void, + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; getDocument( request: GetDocumentRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: GetDocumentResponse) => void, + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; } @@ -818,14 +801,19 @@ export const SearchClient = makeGenericClientConstructor(SearchService, "redacti type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; -export type DeepPartial = T extends Builtin ? T - : T extends globalThis.Array ? globalThis.Array> - : T extends ReadonlyArray ? ReadonlyArray> - : T extends {} ? { [K in keyof T]?: DeepPartial } - : Partial; +export type DeepPartial = T extends Builtin + ? T + : T extends globalThis.Array + ? globalThis.Array> + : T extends ReadonlyArray + ? ReadonlyArray> + : T extends {} + ? { [K in keyof T]?: DeepPartial } + : Partial; type KeysOfUnion = T extends T ? keyof T : never; -export type Exact = P extends Builtin ? P +export type Exact = P extends Builtin + ? P : P & { [K in keyof P]: Exact } & { [K in Exclude>]: never }; function toTimestamp(date: Date): Timestamp { diff --git a/sdks/node/src/multiUserClient.test.ts b/sdks/node/src/multiUserClient.test.ts index c4f734f..1c7067e 100644 --- a/sdks/node/src/multiUserClient.test.ts +++ b/sdks/node/src/multiUserClient.test.ts @@ -203,5 +203,4 @@ describe("MultiUserClient", () => { expect(result).toEqual(chunks); expect(mockSearchClient.getDocument).toHaveBeenCalledWith({ accessToken: idToken, ref: documentName }); }); - }); diff --git a/sdks/node/src/multiUserClient.ts b/sdks/node/src/multiUserClient.ts index 6b4c8bb..b766d18 100644 --- a/sdks/node/src/multiUserClient.ts +++ b/sdks/node/src/multiUserClient.ts @@ -2,11 +2,7 @@ import { randomUUID } from "node:crypto"; import { AuthClient } from "./authClient"; import { Chunk, RelevantChunk } from "./grpc/chunks"; -import { - SearchChunksParams, - GetDocumentParams, - SearchClient -} from "./searchClient"; +import { GetDocumentParams, SearchChunksParams, SearchClient } from "./searchClient"; export interface UserData { signInState?: string; @@ -183,6 +179,6 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.getDocument({ accessToken: userData.idToken!, ref, filters}); + return await this.searchClient.getDocument({ accessToken: userData.idToken!, ref, filters }); } } diff --git a/sdks/node/src/searchClient.ts b/sdks/node/src/searchClient.ts index 2e68974..759f586 100644 --- a/sdks/node/src/searchClient.ts +++ b/sdks/node/src/searchClient.ts @@ -60,12 +60,7 @@ export class SearchClient { * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async searchChunks({ - accessToken, - query, - count = 10, - filters - }: SearchChunksParams): Promise { + async searchChunks({ accessToken, query, count = 10, filters }: SearchChunksParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); @@ -99,11 +94,7 @@ export class SearchClient { * @param filters - The filters for querying documents. Optional. Only applicable for getting by document name. * @returns The complete list of chunks for the matching document. */ - async getDocument({ - accessToken, - ref, - filters - }: GetDocumentParams): Promise { + async getDocument({ accessToken, ref, filters }: GetDocumentParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); diff --git a/sdks/python/src/redactive/grpc/v2/__init__.py b/sdks/python/src/redactive/grpc/v2/__init__.py index cdaedc9..d3adf77 100644 --- a/sdks/python/src/redactive/grpc/v2/__init__.py +++ b/sdks/python/src/redactive/grpc/v2/__init__.py @@ -17,7 +17,6 @@ import grpclib from betterproto.grpc.grpclib_server import ServiceBase - if TYPE_CHECKING: import grpclib.server from betterproto.grpc.grpclib_client import MetadataLike @@ -26,14 +25,10 @@ @dataclass(eq=False, repr=False) class ChunkMetadata(betterproto.Message): - created_at: Optional[datetime] = betterproto.message_field( - 1, optional=True, group="_created_at" - ) + created_at: Optional[datetime] = betterproto.message_field(1, optional=True, group="_created_at") """Chunk content's creation timestamp""" - modified_at: Optional[datetime] = betterproto.message_field( - 2, optional=True, group="_modified_at" - ) + modified_at: Optional[datetime] = betterproto.message_field(2, optional=True, group="_modified_at") """Chunk content's last modified timestamp""" link: Optional[str] = betterproto.string_field(3, optional=True, group="_link") @@ -65,9 +60,7 @@ class SourceReference(betterproto.Message): sharepoint file hash """ - document_path: Optional[str] = betterproto.string_field( - 6, optional=True, group="_document_path" - ) + document_path: Optional[str] = betterproto.string_field(6, optional=True, group="_document_path") """ Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" or @@ -75,9 +68,7 @@ class SourceReference(betterproto.Message): Guide.pdf" """ - document_name: Optional[str] = betterproto.string_field( - 7, optional=True, group="_document_name" - ) + document_name: Optional[str] = betterproto.string_field(7, optional=True, group="_document_name") """Document name in the source system e.g. "document.txt""" @@ -138,25 +129,17 @@ class Chunk(betterproto.Message): @dataclass(eq=False, repr=False) class Query(betterproto.Message): - semantic_query: Optional[str] = betterproto.string_field( - 1, optional=True, group="_semantic_query" - ) + semantic_query: Optional[str] = betterproto.string_field(1, optional=True, group="_semantic_query") """Search query for semantic content""" - keyword_query: Optional[str] = betterproto.string_field( - 2, optional=True, group="_keyword_query" - ) + keyword_query: Optional[str] = betterproto.string_field(2, optional=True, group="_keyword_query") """Specific keywords to search for in source document""" @dataclass(eq=False, repr=False) class TimeSpan(betterproto.Message): - after: Optional[datetime] = betterproto.message_field( - 1, optional=True, group="_after" - ) - before: Optional[datetime] = betterproto.message_field( - 2, optional=True, group="_before" - ) + after: Optional[datetime] = betterproto.message_field(1, optional=True, group="_after") + before: Optional[datetime] = betterproto.message_field(2, optional=True, group="_before") @dataclass(eq=False, repr=False) @@ -171,14 +154,10 @@ class Filters(betterproto.Message): Documents/Engineering/Onboarding Guide.pdf' """ - created: Optional["TimeSpan"] = betterproto.message_field( - 2, optional=True, group="_created" - ) + created: Optional["TimeSpan"] = betterproto.message_field(2, optional=True, group="_created") """Timespan of response chunk's creation""" - modified: Optional["TimeSpan"] = betterproto.message_field( - 3, optional=True, group="_modified" - ) + modified: Optional["TimeSpan"] = betterproto.message_field(3, optional=True, group="_modified") """Timespan of response chunk's last modification""" user_emails: List[str] = betterproto.string_field(4) @@ -198,9 +177,7 @@ class SearchChunksRequest(betterproto.Message): query: "Query" = betterproto.message_field(2) """The query to execute""" - filters: Optional["Filters"] = betterproto.message_field( - 3, optional=True, group="_filters" - ) + filters: Optional["Filters"] = betterproto.message_field(3, optional=True, group="_filters") """Filters to apply to query""" @@ -209,9 +186,7 @@ class GetDocumentRequest(betterproto.Message): ref: str = betterproto.string_field(1) """A reference to the document to retrieve""" - filters: Optional["Filters"] = betterproto.message_field( - 2, optional=True, group="_filters" - ) + filters: Optional["Filters"] = betterproto.message_field(2, optional=True, group="_filters") """Query filters (only really for GetDocByTitle)""" @@ -220,8 +195,8 @@ class SearchChunksResponse(betterproto.Message): success: bool = betterproto.bool_field(1) """Query was successful""" - error: Optional["betterproto_lib_google_protobuf.Struct"] = ( - betterproto.message_field(2, optional=True, group="_error") + error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( + 2, optional=True, group="_error" ) """Error message if query failed""" @@ -234,8 +209,8 @@ class GetDocumentResponse(betterproto.Message): success: bool = betterproto.bool_field(1) """Query was successful""" - error: Optional["betterproto_lib_google_protobuf.Struct"] = ( - betterproto.message_field(2, optional=True, group="_error") + error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( + 2, optional=True, group="_error" ) """Error message if query failed""" @@ -250,7 +225,7 @@ async def search_chunks( *, timeout: Optional[float] = None, deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None + metadata: Optional["MetadataLike"] = None, ) -> "SearchChunksResponse": return await self._unary_unary( "/redactive.grpc.v2.Search/SearchChunks", @@ -267,7 +242,7 @@ async def get_document( *, timeout: Optional[float] = None, deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None + metadata: Optional["MetadataLike"] = None, ) -> "GetDocumentResponse": return await self._unary_unary( "/redactive.grpc.v2.Search/GetDocument", @@ -280,15 +255,10 @@ async def get_document( class SearchBase(ServiceBase): - - async def search_chunks( - self, search_chunks_request: "SearchChunksRequest" - ) -> "SearchChunksResponse": + async def search_chunks(self, search_chunks_request: "SearchChunksRequest") -> "SearchChunksResponse": raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - async def get_document( - self, get_document_request: "GetDocumentRequest" - ) -> "GetDocumentResponse": + async def get_document(self, get_document_request: "GetDocumentRequest") -> "GetDocumentResponse": raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) async def __rpc_search_chunks( diff --git a/sdks/python/src/redactive/reranking/reranker.py b/sdks/python/src/redactive/reranking/reranker.py index 269b9b5..c277b6e 100644 --- a/sdks/python/src/redactive/reranking/reranker.py +++ b/sdks/python/src/redactive/reranking/reranker.py @@ -42,9 +42,7 @@ async def query_chunks( if big_fetch_count > self.conf.max_fetch_results: big_fetch_count = self.conf.max_fetch_results - fetched_chunks = await super().search_chunks( - access_token, query, big_fetch_count, filters - ) + fetched_chunks = await super().search_chunks(access_token, query, big_fetch_count, filters) ranker = Reranker(self.conf.reranking_algorithm) return self.rerank(query, fetched_chunks, ranker, count) diff --git a/sdks/python/src/redactive/search_client.py b/sdks/python/src/redactive/search_client.py index 1bbd291..cdd4663 100644 --- a/sdks/python/src/redactive/search_client.py +++ b/sdks/python/src/redactive/search_client.py @@ -6,10 +6,10 @@ from redactive.grpc.v2 import ( Chunk, Filters, - Query, GetDocumentRequest, - SearchChunksRequest, + Query, RelevantChunk, + SearchChunksRequest, SearchStub, ) @@ -91,4 +91,3 @@ async def get_document( request = GetDocumentRequest(ref=ref) response = await stub.get_document(request) return response.chunks - diff --git a/sdks/python/tests/unit_tests/multi_user_client_tests.py b/sdks/python/tests/unit_tests/multi_user_client_tests.py index 1df2dd9..65164c1 100644 --- a/sdks/python/tests/unit_tests/multi_user_client_tests.py +++ b/sdks/python/tests/unit_tests/multi_user_client_tests.py @@ -5,7 +5,7 @@ import pytest from redactive.auth_client import AuthClient -from redactive.grpc.v2 import Chunk, RelevantChunk +from redactive.grpc.v2 import RelevantChunk from redactive.multi_user_client import MultiUserClient, UserData from redactive.search_client import SearchClient @@ -99,9 +99,7 @@ async def test_search_chunks(multi_user_client: MultiUserClient, mock_search_cli result = await multi_user_client.search_chunks(user_id, query, count, filters=filters) assert result == relevant_chunks - multi_user_client.search_client.search_chunks.assert_called_with( - "idToken123", query, count, filters=filters - ) + multi_user_client.search_client.search_chunks.assert_called_with("idToken123", query, count, filters=filters) @pytest.mark.asyncio diff --git a/sdks/python/tests/unit_tests/search_client_tests.py b/sdks/python/tests/unit_tests/search_client_tests.py index d6cbe8b..483a922 100644 --- a/sdks/python/tests/unit_tests/search_client_tests.py +++ b/sdks/python/tests/unit_tests/search_client_tests.py @@ -36,7 +36,7 @@ async def test_query_chunks(mock_channel_context): @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_query_chunks_with_filter(mock_channel_context): - from redactive.grpc.v2 import Query, SearchChunksRequest, Filters + from redactive.grpc.v2 import Filters, Query, SearchChunksRequest access_token = "test-access_token" query = "Tell me about somethings" From 890339b9497e2d1f71c3a12c07bd72e886001934 Mon Sep 17 00:00:00 2001 From: Angus White Date: Wed, 20 Nov 2024 16:41:56 +1100 Subject: [PATCH 11/12] Update doco even more --- sdks/node/README.md | 42 +++++++++++++++++++++++++++++------- sdks/node/src/index.ts | 1 + sdks/python/README.md | 49 +++++++++++++++++++++++++++++++++--------- 3 files changed, 74 insertions(+), 18 deletions(-) diff --git a/sdks/node/README.md b/sdks/node/README.md index 4281ac2..f6746ce 100644 --- a/sdks/node/README.md +++ b/sdks/node/README.md @@ -7,7 +7,7 @@ The Redactive Node SDK provides a robust and intuitive interface for interacting In order to use the package to integrate with Redactive.ai, run: ```sh -npm install redactive +npm install @redactive/redactive ``` There is no need to clone this repository. @@ -35,21 +35,47 @@ The library has following components. AuthClient needs to be configured with your account's API key which is available in the Apps page at [Redactive Dashboard](https://dashboard.redactive.ai/). +The AuthClient can be used to present users with the data providers' OAuth consent +pages: + ```javascript import { AuthClient } from "@redactive/redactive"; +// Construct AuthClient using your Redactive API key +const client = new AuthClient( + "YOUR-API-KEY-HERE" +) + // Establish an connection to data source -// Possible data sources: confluence, google-drive, jira, zendesk, slack, sharepoint -const redirectUri = "https://url-debugger.vercel.app"; +// Possible data sources: confluence, sharepoint +const redirectUri = "YOUR-REDIRECT-URI"; const provider = "confluence"; const signInUrl = await client.beginConnection({ provider, redirectUri }); -// Navigate User to signInUrl -// User will receive an oauth2 auth code after consenting the app's data source access permissions. -// Use this code to exchange Redactive access_token with Redactive API -const response = await client.exchangeTokens("OAUTH2-AUTH-CODE"); + +// Now redirect your user to signInUrl ``` +The user will be redirected back to your app's configured redirect uri after they have completed the steps on +the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g. +`https://your-redirect-page.com?code=abcde12345`. + +This code may be exchanged for a user access token (which the user may use to issue queries against their data): + +```javascript +// Exchange signin code for a Redactive ID token +const response = await client.exchangeTokens({code: "SIGNIN-CODE"}); +const accessToken = response.idToken +``` + +Once a user has completed the OAuth flow, the data source should show up in their connected data sources: + +```javascript +await client.listConnections({accessToken}).connections === [ "confluence" ] // ✅ +``` + +Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources. + ### SearchClient With the Redactive `access_token`, you can perform two types of search @@ -96,7 +122,7 @@ results from that provider are returned. Filters may be populated and provided to a query in the following way for the NodeJS SDK: ```javascript -import { Filters } from "@redactive/redactive/grpc/search"; +import { Filters } from "@redactive/redactive"; // Query chunks from Confluence only, that are from documents created before last week, modified since last week, // and that are from documents associated with a user's email. Include chunks from trashed documents. diff --git a/sdks/node/src/index.ts b/sdks/node/src/index.ts index 311828a..9edd48d 100644 --- a/sdks/node/src/index.ts +++ b/sdks/node/src/index.ts @@ -1,3 +1,4 @@ export { SearchClient } from "./searchClient"; export { AuthClient } from "./authClient"; export { MultiUserClient } from "./multiUserClient"; +export {Filters} from './grpc/search'; diff --git a/sdks/python/README.md b/sdks/python/README.md index 615713e..f3752f8 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -36,24 +36,53 @@ The library has the following components: AuthClient needs to be configured with your account's API key which is available in the Apps page at [Redactive Dashboard](https://dashboard.redactive.ai/). +The AuthClient can be used to present users with the data providers' OAuth consent +pages: + ```python from redactive.auth_client import AuthClient -client = AuthClient(api_key="API-KEY") +client = AuthClient(api_key="YOUR-APP'S-API-KEY") + +# This value must _exactly_ match the redirect URI you provided when creating your +# Redactive app. +redirect_uri = "YOUR-APP'S-REDIRECT-URI" + +# Possible data sources: confluence, sharepoint +provider = "confluence" -# Establish an connection to data source -# Possible data sources: confluence, google-drive, jira, zendesk, slack, sharepoint -redirect_uri = "https://url-debugger.vercel.app" sign_in_url = await client.begin_connection( - provider="confluence", redirect_uri=redirect_uri + provider=provider, redirect_uri=redirect_uri ) -# Navigate User to sign_in_url -# User will receive an oauth2 auth code after consenting the app's data source access permissions. -# Use this code to exchange Redactive access_token with Redactive API -response = await client.exchange_tokens(code="OAUTH2-TOKEN") +# Now redirect your user to sign_in_url ``` +The user will be redirected back to your app's configured redirect uri after they have completed the steps on +the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g. +`https://your-redirect-page.com?code=abcde12345`. + +This code may be exchanged for a user access token (which the user may use to issue queries against their data): + +```python +# Exchange signin code for a Redactive ID token +response = await client.exchange_tokens(code="SIGNIN-CODE") +access_token = response.idToken +``` + +Once a user has completed the OAuth flow, the data source should show up in their connected data sources: + +```python +response = await client.list_connections( + access_token=access_token +) + +assert "confluence" in response.connections # ✅ +``` + +Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources. + + ### SearchClient With a Redactive `access_token`, you can perform two types of search @@ -69,7 +98,7 @@ client = SearchClient() # Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query client.search_chunks( - access_token="REDACTIVE-USER-ACCESS-TOKEN", + access_token=access_token, query="Tell me about AI" ) ``` From 9d061b04615cc780afac74083939dee975d5b63f Mon Sep 17 00:00:00 2001 From: Angus White Date: Wed, 20 Nov 2024 16:42:41 +1100 Subject: [PATCH 12/12] Format codebase --- sdks/node/README.md | 15 ++++++--------- sdks/node/src/index.ts | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/sdks/node/README.md b/sdks/node/README.md index f6746ce..b1ed8c0 100644 --- a/sdks/node/README.md +++ b/sdks/node/README.md @@ -42,9 +42,7 @@ pages: import { AuthClient } from "@redactive/redactive"; // Construct AuthClient using your Redactive API key -const client = new AuthClient( - "YOUR-API-KEY-HERE" -) +const client = new AuthClient("YOUR-API-KEY-HERE"); // Establish an connection to data source // Possible data sources: confluence, sharepoint @@ -52,26 +50,25 @@ const redirectUri = "YOUR-REDIRECT-URI"; const provider = "confluence"; const signInUrl = await client.beginConnection({ provider, redirectUri }); - -// Now redirect your user to signInUrl +// Now redirect your user to signInUrl ``` The user will be redirected back to your app's configured redirect uri after they have completed the steps on the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g. -`https://your-redirect-page.com?code=abcde12345`. +`https://your-redirect-page.com?code=abcde12345`. This code may be exchanged for a user access token (which the user may use to issue queries against their data): ```javascript // Exchange signin code for a Redactive ID token -const response = await client.exchangeTokens({code: "SIGNIN-CODE"}); -const accessToken = response.idToken +const response = await client.exchangeTokens({ code: "SIGNIN-CODE" }); +const accessToken = response.idToken; ``` Once a user has completed the OAuth flow, the data source should show up in their connected data sources: ```javascript -await client.listConnections({accessToken}).connections === [ "confluence" ] // ✅ +(await client.listConnections({ accessToken }).connections) === ["confluence"]; // ✅ ``` Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources. diff --git a/sdks/node/src/index.ts b/sdks/node/src/index.ts index 9edd48d..8468431 100644 --- a/sdks/node/src/index.ts +++ b/sdks/node/src/index.ts @@ -1,4 +1,4 @@ export { SearchClient } from "./searchClient"; export { AuthClient } from "./authClient"; export { MultiUserClient } from "./multiUserClient"; -export {Filters} from './grpc/search'; +export { Filters } from "./grpc/search";