Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚚 (#266): RESTify az-document-ai service #279

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions backend/app.hopps.az-document-ai/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@
<version>1.0.0-SNAPSHOT</version>
</dependency>
<!-- quarkus -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-messaging-kafka</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc</artifactId>
Expand Down Expand Up @@ -108,18 +104,13 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.smallrye.reactive</groupId>
<artifactId>smallrye-reactive-messaging-in-memory</artifactId>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-jacoco</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-jacoco</artifactId>
<groupId>io.rest-assured</groupId>
<artifactId>rest-assured</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.net.URL;
import java.time.Duration;
import java.util.List;
import java.util.Optional;

@ApplicationScoped
public class AzureAiService {
Expand All @@ -43,27 +44,31 @@ public AzureAiService(AzureDocumentConnector azureDocumentConnector, OidcClient
this.oidcClient = oidcClient;
}

public ReceiptData scanReceipt(DocumentData documentData) {
Document document = scanDocument(receiptModelId, documentData.internalFinUrl());
if (document == null) {
return null;
public Optional<ReceiptData> scanReceipt(DocumentData documentData) {
var document = scanDocument(receiptModelId, documentData.internalFinUrl());
if (document.isEmpty()) {
return Optional.empty();
}

return ReceiptDataHelper.fromDocument(documentData.referenceKey(), document);
LOG.info("Scanned receipt: {}", document.get().getFields());

ReceiptData receiptData = ReceiptDataHelper.fromDocument(documentData.referenceKey(), document.get());
return Optional.of(receiptData);
}

public InvoiceData scanInvoice(DocumentData documentData) {
Document document = scanDocument(invoiceModelId, documentData.internalFinUrl());
if (document == null) {
return null;
public Optional<InvoiceData> scanInvoice(DocumentData documentData) {
var document = scanDocument(invoiceModelId, documentData.internalFinUrl());
if (document.isEmpty()) {
return Optional.empty();
}

LOG.info("Scanned document: {}", document.getFields());
LOG.info("Scanned invoice: {}", document.get().getFields());

return InvoiceDataHelper.fromDocument(documentData.referenceKey(), document);
InvoiceData invoiceData = InvoiceDataHelper.fromDocument(documentData.referenceKey(), document.get());
return Optional.of(invoiceData);
}

private Document scanDocument(String modelId, URL documentUrl) {
private Optional<Document> scanDocument(String modelId, URL documentUrl) {
LOG.info("(model={}) Starting scan of document: '{}'", modelId, documentUrl);
byte[] documentBytes = fetchDocument(documentUrl);

Expand All @@ -72,14 +77,14 @@ private Document scanDocument(String modelId, URL documentUrl) {

if (documents.isEmpty()) {
LOG.error("Couldn't analyze document '{}'", documentUrl);
return null;
return Optional.empty();
} else if (documents.size() > 1) {
LOG.warn("Document analysis found {} documents, using first one", documents.size());
}

LOG.info("(model={}) Scan successfully completed for: '{}'", modelId, documentUrl);

return documents.getFirst();
return Optional.ofNullable(documents.getFirst());
}

private byte[] fetchDocument(URL documentUrl) {
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package app.hopps;

import app.hopps.commons.DocumentData;
import app.hopps.commons.DocumentType;
import app.hopps.commons.InvoiceData;
import app.hopps.commons.ReceiptData;
import app.hopps.model.ScanDocumentBody;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import jakarta.ws.rs.POST;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.media.Content;
import org.eclipse.microprofile.openapi.annotations.media.Schema;
import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;

@ApplicationScoped
@Path("/document/scan")
public class ScanDocumentResource {
private final AzureAiService aiService;

@Inject
public ScanDocumentResource(AzureAiService aiService) {
this.aiService = aiService;
}

@POST
@Path("/invoice")
@Operation(summary = "Scans the invoice at this URL", description = "Uses Azure Document AI to scan an invoice")
@APIResponse(responseCode = "200", description = "Data about this invoice", content = @Content(mediaType = MediaType.APPLICATION_JSON, schema = @Schema(implementation = InvoiceData.class)))
@APIResponse(responseCode = "400", description = "Couldn't extract data / Invalid URL / other")
public InvoiceData scanInvoice(ScanDocumentBody body) {
DocumentData documentData = new DocumentData(body.parseDocumentUrl(), -1L, DocumentType.INVOICE);
return aiService.scanInvoice(documentData)
.orElseThrow(
() -> new WebApplicationException("Could not extract document", Response.Status.BAD_REQUEST));
}

@POST
@Path("/receipt")
@Operation(summary = "Scans the receipt at this URL", description = "Uses Azure Document AI to scan a receipt")
@APIResponse(responseCode = "200", description = "Data about this receipt", content = @Content(mediaType = MediaType.APPLICATION_JSON, schema = @Schema(implementation = ReceiptData.class)))
@APIResponse(responseCode = "400", description = "Couldn't extract data / Invalid URL / other")
public ReceiptData scanReceipt(ScanDocumentBody body) {
DocumentData documentData = new DocumentData(body.parseDocumentUrl(), -1L, DocumentType.RECEIPT);
return aiService.scanReceipt(documentData)
.orElseThrow(
() -> new WebApplicationException("Could not extract document", Response.Status.BAD_REQUEST));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package app.hopps.model;

import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.Response;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

public record ScanDocumentBody(String documentUrl) {
public URL parseDocumentUrl() throws WebApplicationException {
try {
return new URI(documentUrl).toURL();
} catch (MalformedURLException | URISyntaxException e) {
throw new WebApplicationException("Invalid document URL", e, Response.Status.BAD_REQUEST);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,8 @@
# Quarkus
#######################################
%dev.quarkus.http.port=8100
quarkus.http.root-path=/api/az-document-ai
%prod.quarkus.management.enabled=true
#
########################################
# Kafka
########################################
%dev.quarkus.kafka.devservices.port=32782
#
## Incoming
mp.messaging.incoming.documents-in.connector=smallrye-kafka
mp.messaging.incoming.documents-in.topic=app.hopps.documents.images
#
## Outgoing
### Invoices
mp.messaging.outgoing.invoices-out.connector=smallrye-kafka
mp.messaging.outgoing.invoices-out.topic=app.hopps.documents.invoice-data
mp.messaging.outgoing.invoices-out.cloud-events=true
mp.messaging.outgoing.invoices-out.cloud-events-mode=binary
mp.messaging.outgoing.invoices-out.cloud-events-source=invoices
mp.messaging.outgoing.invoices-out.cloud-events-type=app.hopps.commons.InvoiceData
### Receipts
mp.messaging.outgoing.receipts-out.connector=smallrye-kafka
mp.messaging.outgoing.receipts-out.topic=app.hopps.documents.receipt-data
mp.messaging.outgoing.receipts-out.cloud-events=true
mp.messaging.outgoing.receipts-out.cloud-events-mode=binary
mp.messaging.outgoing.receipts-out.cloud-events-source=receipts
mp.messaging.outgoing.receipts-out.cloud-events-type=app.hopps.commons.ReceiptData
#
########################################
# Azure
########################################
Expand All @@ -42,4 +17,4 @@ app.hopps.az-document-ai.azure.receiptModelId=prebuilt-receipt
# OIDC-Client
########################################
%dev.quarkus.keycloak.devservices.port=8554
quarkus.oidc-client.grant.type=client
quarkus.oidc-client.grant.type=client
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.Optional;

import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -51,9 +53,10 @@ void shouldAnalyzeInvoiceAgainstMock() throws Exception {
DocumentData documentData = new DocumentData(new URI(url).toURL(), -1L, DocumentType.INVOICE);

// when
InvoiceData invoiceData = aiService.scanInvoice(documentData);
Optional<InvoiceData> invoiceData = aiService.scanInvoice(documentData);

// then
assertNotNull(invoiceData);
assertTrue(invoiceData.isPresent());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
import org.junit.jupiter.api.Test;

import java.net.URI;
import java.util.Optional;

import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

@QuarkusTest
class AzureAiServiceTest {
Expand All @@ -27,9 +29,10 @@ void shouldAnalyzeInvoiceAgainstAzure() throws Exception {
DocumentData documentData = new DocumentData(new URI(url).toURL(), -1L, DocumentType.INVOICE);

// when
InvoiceData invoiceData = aiService.scanInvoice(documentData);
Optional<InvoiceData> invoiceData = aiService.scanInvoice(documentData);

// then
assertNotNull(invoiceData);
assertTrue(invoiceData.isPresent());
}
}
Loading
Loading