forked from nddipiazza/tika-pipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtika.proto
357 lines (313 loc) · 11.1 KB
/
tika.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package tika;
option go_package = "apache.org/tika";
option java_multiple_files = true;
option java_package = "org.apache.tika";
option java_outer_classname = "TikaProto";
option objc_class_prefix = "HLW";
// The Tika Grpc Service definition
service Tika {
/*
Save a fetcher to the fetcher store.
*/
rpc SaveFetcher(SaveFetcherRequest) returns (SaveFetcherReply) {}
/*
Get a fetcher's data from the fetcher store.
*/
rpc GetFetcher(GetFetcherRequest) returns (GetFetcherReply) {}
/*
List fetchers that are currently in the fetcher store.
*/
rpc ListFetchers(ListFetchersRequest) returns (ListFetchersReply) {}
/*
Delete a fetcher from the fetcher store.
*/
rpc DeleteFetcher(DeleteFetcherRequest) returns (DeleteFetcherReply) {}
/*
Using a Fetcher in the fetcher store, send a FetchAndParse request. This will fetch, parse, and return
the FetchParseTuple data output from Tika Pipes. This is a synchronous call that immediately returns 1 result.
*/
rpc FetchAndParse(FetchAndParseRequest) returns (FetchAndParseReply) {}
/*
Using a Fetcher in the fetcher store, send a FetchAndParse request. This will fetch, parse, and return
the FetchParseTuple data output from Tika Pipes. This will stream the data from the server in response.
*/
rpc FetchAndParseServerSideStreaming(FetchAndParseRequest)
returns (stream FetchAndParseReply) {}
/*
Using a Fetcher in the fetcher store, send a FetchAndParse request. This will fetch, parse, and return
the FetchParseTuple data output from Tika Pipes. This serves a bi-directional stream of fetch inputs and
parsed outputs.
*/
rpc FetchAndParseBiDirectionalStreaming(stream FetchAndParseRequest)
returns (stream FetchAndParseReply) {}
/*
Get the Fetcher Config schema for a given fetcher.
*/
rpc GetFetcherConfigJsonSchema(GetFetcherConfigJsonSchemaRequest) returns (GetFetcherConfigJsonSchemaReply) {}
/*
Save an emitter to the emitter store.
*/
rpc SaveEmitter(SaveEmitterRequest) returns (SaveEmitterReply) {}
/*
Get an emitter's data from the emitter store.
*/
rpc GetEmitter(GetEmitterRequest) returns (GetEmitterReply) {}
/*
List emitters that are currently in the emitter store.
*/
rpc ListEmitters(ListEmittersRequest) returns (ListEmittersReply) {}
/*
Delete an emitter from the emitter store.
*/
rpc DeleteEmitter(DeleteEmitterRequest) returns (DeleteEmitterReply) {}
/*
Get the Emitter Config schema for a given emitter.
*/
rpc GetEmitterConfigJsonSchema(GetEmitterConfigJsonSchemaRequest) returns (GetEmitterConfigJsonSchemaReply) {}
/*
Save a pipe iterator to the pipe iterator store.
*/
rpc SavePipeIterator(SavePipeIteratorRequest) returns (SavePipeIteratorReply) {}
/*
Get a pipe iterator's data from the pipe iterator store.
*/
rpc GetPipeIterator(GetPipeIteratorRequest) returns (GetPipeIteratorReply) {}
/*
List pipe iterators that are currently in the pipe iterator store.
*/
rpc ListPipeIterators(ListPipeIteratorsRequest) returns (ListPipeIteratorsReply) {}
/*
Delete a pipe iterator from the pipe iterator store.
*/
rpc DeletePipeIterator(DeletePipeIteratorRequest) returns (DeletePipeIteratorReply) {}
/*
Get the Pipe Iterator Config schema for a given pipe iterator.
*/
rpc GetPipeIteratorConfigJsonSchema(GetPipeIteratorConfigJsonSchemaRequest) returns (GetPipeIteratorConfigJsonSchemaReply) {}
/*
Run a pipe job.
*/
rpc RunPipeJob(RunPipeJobRequest) returns (RunPipeJobReply) {}
/*
Get a pipe job's state.
*/
rpc GetPipeJob(GetPipeJobRequest) returns (GetPipeJobReply) {}
}
message SaveFetcherRequest {
// A unique identifier for each fetcher. If this already exists, operation will overwrite existing.
string fetcher_id = 1;
// The pf4j plugin ID of the fetcher.
string plugin_id = 2;
// JSON string of the FetcherConfig object.
string fetcher_config_json = 3;
}
message SaveFetcherReply {
// The fetcher_id that was saved.
string fetcher_id = 1;
}
message FetchAndParseRequest {
// The ID of the fetcher in the fetcher store (previously saved by SaveFetcher) to use for the fetch.
string fetcher_id = 1;
// The "Fetch Key" of the item that will be fetched.
string fetch_key = 2;
// In the case that a FetchKey is not sufficient to fetch the item, this will contain additional metadata.
string fetch_metadata_json = 3;
// Add this metadata to the parse output.
string added_metadata_json = 4;
}
message Metadata {
map<string, string> fields = 1;
}
message FetchAndParseReply {
// Echoes the fetch_key that was sent in the request.
string fetch_key = 1;
// List of Metadata fields from the parse output.
repeated Metadata metadata = 2;
// The status from the message. See javadoc for org.apache.tika.pipes.PipesResult.STATUS for the list of status.
string status = 3;
// If there was an error, this will contain the error message.
string error_message = 4;
// Estimated size in bytes of this.
int64 estimatedSizeBytes = 5;
}
message DeleteFetcherRequest {
// ID of the fetcher to delete.
string fetcher_id = 1;
}
message DeleteFetcherReply {
// Success if the fetcher was successfully removed from the fetch store.
bool success = 1;
}
message GetFetcherRequest {
// ID of the fetcher for which to return config.
string fetcher_id = 1;
}
message GetFetcherReply {
// Echoes the ID of the fetcher being returned.
string fetcher_id = 1;
// Plugin ID of the fetcher
string plugin_id = 2;
// The configuration parameters.
map<string, string> params = 3;
}
message ListFetchersRequest {
// List the fetchers starting at this page number
int32 page_number = 1;
// List this many fetchers per page.
int32 num_fetchers_per_page = 2;
}
message ListFetchersReply {
// List of fetcher configs returned by the Lists Fetchers service.
repeated GetFetcherReply get_fetcher_replies = 1;
}
message GetFetcherConfigJsonSchemaRequest {
// The plugin name of the fetcher for which to fetch json schema.
string plugin_id = 1;
}
message GetFetcherConfigJsonSchemaReply {
// The json schema that describes the fetcher config in string format.
string fetcher_config_json_schema = 1;
}
message SaveEmitterRequest {
// A unique identifier for each emitter. If this already exists, operation will overwrite existing.
string emitter_id = 1;
// The pf4j plugin ID of the emitter.
string plugin_id = 2;
// JSON string of the EmitterConfig object.
string emitter_config_json = 3;
}
message SaveEmitterReply {
// The emitter_id that was saved.
string emitter_id = 1;
}
message GetEmitterRequest {
// ID of the emitter for which to return config.
string emitter_id = 1;
}
message GetEmitterReply {
// Echoes the ID of the emitter being returned.
string emitter_id = 1;
// The pf4j plugin ID of the emitter.
string plugin_id = 2;
// The configuration parameters.
map<string, string> params = 3;
}
message ListEmittersRequest {
// List the emitters starting at this page number.
int32 page_number = 1;
// List this many emitters per page.
int32 num_emitters_per_page = 2;
}
message ListEmittersReply {
// List of emitter configs returned by the Lists Emitters service.
repeated GetEmitterReply get_emitter_replies = 1;
}
message DeleteEmitterRequest {
// ID of the emitter to delete.
string emitter_id = 1;
}
message DeleteEmitterReply {
// Success if the emitter was successfully removed from the emitter store.
bool success = 1;
}
message GetEmitterConfigJsonSchemaRequest {
// The PF4J plugin ID emitter config for which to fetch json schema.
string plugin_id = 1;
}
message GetEmitterConfigJsonSchemaReply {
// The json schema that describes the emitter config in string format.
string emitter_config_json_schema = 1;
}
message SavePipeIteratorRequest {
// A unique identifier for each pipe iterator. If this already exists, operation will overwrite existing.
string pipe_iterator_id = 1;
// The PF4j plugin ID of the pipe iterator.
string plugin_id = 2;
// JSON string of the PipeIteratorConfig object.
string pipe_iterator_config_json = 3;
}
message SavePipeIteratorReply {
// The pipe_iterator_id that was saved.
string pipe_iterator_id = 1;
}
message GetPipeIteratorRequest {
// ID of the pipe iterator for which to return config.
string pipe_iterator_id = 1;
}
message GetPipeIteratorReply {
// Echoes the ID of the pipe iterator being returned.
string pipe_iterator_id = 1;
// The PF4j plugin ID of the pipe iterator.
string plugin_id = 2;
// The configuration parameters.
map<string, string> params = 3;
}
message ListPipeIteratorsRequest {
// List the pipe iterators starting at this page number
int32 page_number = 1;
// List this many pipe iterators per page.
int32 num_pipe_iterators_per_page = 2;
}
message ListPipeIteratorsReply {
// List of pipe iterator configs returned by the Lists Pipe Iterators service.
repeated GetPipeIteratorReply get_pipe_iterator_replies = 1;
}
message DeletePipeIteratorRequest {
// ID of the pipe iterator to delete.
string pipe_iterator_id = 1;
}
message DeletePipeIteratorReply {
// Success if the pipe iterator was successfully removed from the pipe iterator store.
bool success = 1;
}
message GetPipeIteratorConfigJsonSchemaRequest {
// The PF4j plugin ID of the pipe iterator for which to obtain the config json.
string plugin_id = 1;
}
message GetPipeIteratorConfigJsonSchemaReply {
// The json schema that describes the pipe iterator config in string format.
string pipe_iterator_config_json_schema = 1;
}
message RunPipeJobRequest {
// The ID of the pipe iterator in the pipe iterator store (previously saved by SavePipeIterator) to use for the pipe job.
string pipe_iterator_id = 1;
// The ID of the fetcher in the fetcher store (previously saved by SaveFetcher) to use for the pipe job.
string fetcher_id = 2;
// The ID of the emitter in the emitter store (previously saved by SaveEmitter) to use for the pipe job.
string emitter_id = 3;
// Hard timeout of job in seconds
int32 job_completion_timeout_seconds = 4;
}
message RunPipeJobReply {
// The ID of the pipe job that was started.
string pipe_job_id = 1;
}
message GetPipeJobRequest {
// The ID of the pipe job for which to return state.
string pipe_job_id = 1;
}
message GetPipeJobReply {
// The ID of the pipe job being returned.
string pipe_job_id = 1;
// The ID of the pipe iterator used in the pipe job.
string pipe_iterator_id = 2;
// The ID of the fetcher used in the pipe job.
string fetcher_id = 3;
// The ID of the emitter used in the pipe job.
string emitter_id = 4;
bool is_running = 5;
bool is_completed = 6;
bool has_error = 7;
}