Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API v2 #48

Merged
merged 12 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions protos/chunks.proto
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
syntax = "proto3";

package redactive.grpc.v1;
package redactive.grpc.v2;

import "google/protobuf/timestamp.proto";


message ChunkMetadata {
// Chunk content's creation timestamp
optional google.protobuf.Timestamp created_at = 1;
Expand All @@ -13,17 +14,18 @@ message ChunkMetadata {
}

message SourceReference {
// Source system of the document e.g. confluence, slack, google-drive
// Source system of the document e.g. confluence, sharepoint
string system = 1;
// Version of the source system e.g. 1.0.0
string system_version = 2;
// Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id
// Connection id to the source system e.g. confluence space id, sharepoint drive id
string connection_id = 3;
// Document id in the source system e.g. confluence page id, slack message id, google-drive document id
// Document id in the source system e.g. confluence page id, sharepoint file id
string document_id = 4;
// Document version in the source system e.g. confluence page version, slack message version, google-drive document version
// Document version in the source system e.g. confluence page version, sharepoint file hash
string document_version = 5;
// Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name"
// Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide"
// or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf"
optional string document_path = 6;
// Document name in the source system e.g. "document.txt"
optional string document_name = 7;
Expand Down
59 changes: 21 additions & 38 deletions protos/search.proto
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
syntax = "proto3";

package redactive.grpc.v1;
package redactive.grpc.v2;

import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";

import "chunks.proto";

service Search {
// Query the index for relevant chunks
rpc QueryChunks(QueryRequest) returns (QueryResponse);
rpc SearchChunks(SearchChunksRequest) returns (SearchChunksResponse);
// Query the index for all chunks of a specific document
rpc QueryChunksByDocumentName(QueryByDocumentNameRequest) returns (QueryByDocumentNameResponse);
// Get chunks by URL
rpc GetChunksByUrl(GetChunksByUrlRequest) returns (GetChunksByUrlResponse);
rpc GetDocument(GetDocumentRequest) returns (GetDocumentResponse);
}

message Query {
// Semantic query to execute
string semantic_query = 1;
}

message DocumentNameQuery {
// Document name to search for
string document_name = 1;
// Search query for semantic content
optional string semantic_query = 1;
// Specific keywords to search for in source document
optional string keyword_query = 2;
}

message TimeSpan {
Expand All @@ -32,7 +26,10 @@ message TimeSpan {
}

message Filters {
// Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx"
// Scope of the query. This may either be the name of a fetcher, or a subspace of documents.
at-white marked this conversation as resolved.
Show resolved Hide resolved
// Subspaces take the form of <provider>://<tenancy>/<path>
// e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide'
// for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf'
repeated string scope = 1;
// Timespan of response chunk's creation
optional TimeSpan created = 2;
Expand All @@ -44,7 +41,7 @@ message Filters {
optional bool include_content_in_trash = 5;
}

message QueryRequest {
message SearchChunksRequest {
// How many results to try to return (maximum number of results)
optional uint32 count = 1;
// The query to execute
Expand All @@ -53,7 +50,14 @@ message QueryRequest {
optional Filters filters = 3;
}

message QueryResponse {
message GetDocumentRequest {
// A reference to the document to retrieve
string ref = 1;
// Query filters (only really for GetDocByTitle)
optional Filters filters = 2;
}

message SearchChunksResponse {
// Query was successful
bool success = 1;
// Error message if query failed
Expand All @@ -62,28 +66,7 @@ message QueryResponse {
repeated RelevantChunk relevant_chunks = 3;
}

message GetChunksByUrlRequest {
// URL to document
string url = 1;
}

message GetChunksByUrlResponse {
// Fetch was successful
bool success = 1;
// Error message if fetch failed
optional google.protobuf.Struct error = 2;
// List of chunks
repeated Chunk chunks = 3;
}

message QueryByDocumentNameRequest {
// The query to execute
DocumentNameQuery query = 2;
// Filters to apply to query
optional Filters filters = 3;
}

message QueryByDocumentNameResponse {
message GetDocumentResponse {
// Query was successful
bool success = 1;
// Error message if query failed
Expand Down
109 changes: 81 additions & 28 deletions sdks/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The Redactive Node SDK provides a robust and intuitive interface for interacting
In order to use the package to integrate with Redactive.ai, run:

```sh
npm install redactive
npm install @redactive/redactive
```

There is no need to clone this repository.
Expand Down Expand Up @@ -35,54 +35,91 @@ The library has following components.
AuthClient needs to be configured with your account's API key which is
available in the Apps page at [Redactive Dashboard](https://dashboard.redactive.ai/).

The AuthClient can be used to present users with the data providers' OAuth consent
pages:

```javascript
import { AuthClient } from "@redactive/redactive";

// Construct AuthClient using your Redactive API key
const client = new AuthClient("YOUR-API-KEY-HERE");

// Establish an connection to data source
// Possible data sources: confluence, google-drive, jira, zendesk, slack, sharepoint
const redirectUri = "https://url-debugger.vercel.app";
// Possible data sources: confluence, sharepoint
const redirectUri = "YOUR-REDIRECT-URI";
const provider = "confluence";
const signInUrl = await client.beginConnection({ provider, redirectUri });

// Navigate User to signInUrl
// User will receive an oauth2 auth code after consenting the app's data source access permissions.
// Use this code to exchange Redactive access_token with Redactive API
const response = await client.exchangeTokens("OAUTH2-AUTH-CODE");
// Now redirect your user to signInUrl
```

The user will be redirected back to your app's configured redirect uri after they have completed the steps on
the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g.
`https://your-redirect-page.com?code=abcde12345`.

This code may be exchanged for a user access token (which the user may use to issue queries against their data):

```javascript
// Exchange signin code for a Redactive ID token
const response = await client.exchangeTokens({ code: "SIGNIN-CODE" });
const accessToken = response.idToken;
```

Once a user has completed the OAuth flow, the data source should show up in their connected data sources:

```javascript
(await client.listConnections({ accessToken }).connections) === ["confluence"]; // ✅
```

Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources.

### SearchClient

With the Redactive access_token, you can perform three types of searches using the Redactive Search service:
With the Redactive `access_token`, you can perform two types of search

1. **Semantic Query Search**: Retrieve relevant chunks of information that are semantically related to a user query.
2. **URL-based Search**: Obtain all the chunks from a document by specifying its URL.
3. **Document Name Search**: Query for all the chunks from a document based on the name of the document.
#### Query-based Search

Retrieve relevant chunks of information that are related to a user query.

```javascript
import { SearchClient } from "@redactive/redactive";

const client = new SearchClient();
const accessToken = "REDACTIVE-ACCESS-TOKEN";

// Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query
const semanticQuery = "Tell me about AI";
await client.queryChunks({ accessToken, semanticQuery });

// URL-based Search: retrieve all chunks of the document at that URL
const url = "https://example.com/document";
await client.getChunksByUrl({ accessToken, url });
// Query-based Search: retrieve text extracts (chunks) from various documents pertaining to the user query
const query = "Tell me about AI";
await client.searchChunks({ accessToken, query });
```

// Document Name Search : retrieve all chunks of a document identified by its name
const documentName = "AI Research Paper";
await client.queryChunksByDocumentName({ accessToken, documentName });
**Filters** may be applied to query-based search operations. At present, the following fields may be provided as filter predicates:

```protobuf
message Filters {
// Scope of the query. This may either be the name of a provider, or a subspace of documents.
// Subspaces take the form of <provider>://<tenancy>/<path>
// e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide'
// for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf'
repeated string scope = 1;
// Timespan of response chunk's creation
optional TimeSpan created = 2;
// Timespan of response chunk's last modification
optional TimeSpan modified = 3;
// List of user emails associated with response chunk
repeated string user_emails = 4;
// Include content from documents in trash
optional bool include_content_in_trash = 5;
}
```

### Filters
The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object,
the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no
results from that provider are returned.

Query methods, i.e. `queryChunks`, `queryChunksByDocumentName`, support a set of optional filters. The filters are applied in a logical 'AND' operation. If a data source provider does not support a filter-type, then no results from that provider are returned.
Filters may be populated and provided to a query in the following way for the NodeJS SDK:

```typescript
import { Filters } from "@redactive/redactive/grpc/search";
```javascript
import { Filters } from "@redactive/redactive";

// Query chunks from Confluence only, that are from documents created before last week, modified since last week,
// and that are from documents associated with a user's email. Include chunks from trashed documents.
Expand All @@ -98,7 +135,23 @@ const filters: Filters = {
userEmails: ["[email protected]"],
includeContentInTrash: true
};
await client.queryChunks({ accessToken, semanticQuery, filters });
await client.searchChunks({ accessToken, semanticQuery, filters });

```

#### Document Fetch

Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL).

```javascript
import { SearchClient } from "@redactive/redactive";

const client = new SearchClient();
const accessToken = "REDACTIVE-ACCESS-TOKEN";

// URL-based Search: retrieve all chunks of the document at that URL
const url = "https://example.com/document";
await client.getDocument({ accessToken, url });
```

### Multi-User Client
Expand All @@ -124,8 +177,8 @@ let [signInCode, state] = ["", ""]; // from URL query parameters
const isConnectionSuccessful = await multiUserClient.handleConnectionCallback(userId, signInCode, state);

// User can now use Redactive search service via `MultiUserClient`'s other methods:
const semanticQuery = "Tell me about the missing research vessel, the Borealis";
const chunks = await multiUserClient.queryChunks({ userId, semanticQuery });
const query = "Tell me about the missing research vessel, the Borealis";
const chunks = await multiUserClient.searchChunks({ userId, query });
```

## Development
Expand Down
Loading