Skip to content

Commit

Permalink
Merge pull request #7 from nenadjakic/feature/delete_tasks_ex_handler…
Browse files Browse the repository at this point in the history
…_refactoring_improvements

Feature/delete tasks ex handler refactoring improvements
  • Loading branch information
nenadjakic authored Sep 18, 2024
2 parents a87a5a4 + 58bc0d0 commit c1e5581
Show file tree
Hide file tree
Showing 22 changed files with 301 additions and 119 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.github.nenadjakic.ocr.studio.config

import com.github.nenadjakic.ocr.studio.entity.Status

enum class MessageConst(val description: String) {
ILLEGAL_STATUS("Cannot remove file for task, because status is different than ${Status.CREATED}."),
MISSING_DOCUMENT("Cannot find task with specified id.");
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import com.github.nenadjakic.ocr.studio.dto.*
import com.github.nenadjakic.ocr.studio.entity.OcrConfig
import com.github.nenadjakic.ocr.studio.entity.SchedulerConfig
import com.github.nenadjakic.ocr.studio.entity.Task
import com.github.nenadjakic.ocr.studio.exception.IllegalStateOcrException
import com.github.nenadjakic.ocr.studio.exception.MissingDocumentOcrException
import com.github.nenadjakic.ocr.studio.extension.collectionMap
import com.github.nenadjakic.ocr.studio.service.TaskService
import io.swagger.v3.oas.annotations.Operation
Expand Down Expand Up @@ -88,9 +90,7 @@ open class TaskController(
)
@PostMapping(consumes = [MediaType.MULTIPART_FORM_DATA_VALUE])
fun create(
@Valid @RequestPart(name = "model")
//@Schema(implementation = TaskAddRequest::class)
model: TaskAddRequest,
@Valid @RequestPart(name = "model") model: TaskAddRequest,
@RequestPart(value = "files", required = false) files: Collection<MultipartFile>?
): ResponseEntity<Void> {
val task = modelMapper.map(model, Task::class.java)
Expand Down Expand Up @@ -184,6 +184,45 @@ open class TaskController(
@RequestPart("files") multipartFiles: Collection<MultipartFile>
): ResponseEntity<List<UploadDocumentResponse>> = ResponseEntity.ok(modelMapper.collectionMap(taskService.upload(id, multipartFiles), UploadDocumentResponse::class.java))

@Operation(
operationId = "removeFile",
summary = "Remove file or all files and document from task.",
description = "Remove file or all files (in case that param originalFileName is not give) and document from task."
)
@ApiResponses(
value = [
ApiResponse(responseCode = "204", description = "File removed from file system successfully. Also document task updated successfully."),
ApiResponse(responseCode = "400", description = "Invalid request data.")
]
)
@DeleteMapping("/file/{id}")
@Throws(MissingDocumentOcrException::class)
fun removeFile(@PathVariable id: UUID, @RequestParam(required = false) originalFileName: String): ResponseEntity<Void> {
if (originalFileName.isEmpty()) {
taskService.removeAllFiles(id)
} else {
taskService.removeFile(id, originalFileName)
}
return ResponseEntity.noContent().build()
}

@Operation(
operationId = "deleteTask",
summary = "Delete task and remove all files.",
description = "Delete task and remove all files."
)
@ApiResponses(
value = [
ApiResponse(responseCode = "204", description = "Task deleted and all files removed from file system successfully."),
ApiResponse(responseCode = "400", description = "Invalid request data.")
]
)
@DeleteMapping("/{id}")
@Throws(MissingDocumentOcrException::class, IllegalStateOcrException::class)
fun deleteById (@PathVariable id: UUID) {
taskService.deleteById(id)
}

private fun insert(task: Task, files: Collection<MultipartFile>? = null): ResponseEntity<Void> {
val createdTask = taskService.insert(task, files)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class OcrConfigRequest {
lateinit var ocrEngineMode: OcrConfig.OcrEngineMode
lateinit var pageSegmentationMode: OcrConfig.PageSegmentationMode
lateinit var language: String
var tessVariables: Map<String, String>? = null
var preProcessing: Boolean = false
lateinit var fileFormat: FileFormat
var mergeDocuments: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package com.github.nenadjakic.ocr.studio.entity

class Document {
lateinit var originalFileName: String
lateinit var randomizedFileName: String
class Document(
val originalFileName: String,
val randomizedFileName: String
) {
var type: String? = null
var outDocument: OutDocument? = null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package com.github.nenadjakic.ocr.studio.entity

class DocumentMutableList : ArrayList<Document>() {
var mergedDocumentName: String? = null
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@ package com.github.nenadjakic.ocr.studio.entity

import net.sourceforge.tess4j.ITesseract.RenderedFormat

class OcrConfig {
class OcrConfig(
var language: String = "eng",
var ocrEngineMode: OcrEngineMode = OcrEngineMode.DEFAULT,
var pageSegmentationMode: PageSegmentationMode = PageSegmentationMode.MODE_3,
var tessVariables: Map<String, String>? = null,
var preProcessing: Boolean = false,
var fileFormat: FileFormat = FileFormat.TEXT,
var mergeDocuments: Boolean = false
) {
enum class OcrEngineMode(val tesseractValue: Int, val descritpion: String) {
LEGACY(0, "Legacy engine only."),
LSTM(1, "Neural nets LSTM engine only."),
Expand Down Expand Up @@ -41,12 +49,4 @@ class OcrConfig {
TEXT -> "txt"
}
}

var language: String = "eng"
var ocrEngineMode: OcrEngineMode = OcrEngineMode.DEFAULT
var pageSegmentationMode: PageSegmentationMode = PageSegmentationMode.MODE_3
var preProcessing: Boolean = false
var fileFormat: FileFormat = FileFormat.TEXT
var mergeDocuments: Boolean = false

}
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
package com.github.nenadjakic.ocr.studio.entity

class OcrProgress() {
constructor(status: Status, progress: String, description: String?) : this() {
this.status = status
this.progress = progress
this.description = description
}

var status: Status = Status.CREATED
var progress: String = "N/A"
class OcrProgress(
var status: Status = Status.CREATED,
var progress: String = "N/A",
var description: String? = null
}
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ package com.github.nenadjakic.ocr.studio.entity

import java.time.ZonedDateTime

class SchedulerConfig {
class SchedulerConfig(
var startDateTime: ZonedDateTime? = null
}
)
36 changes: 4 additions & 32 deletions src/main/kotlin/com/github/nenadjakic/ocr/studio/entity/Task.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,19 @@ import org.springframework.data.mongodb.core.mapping.Field
import java.util.*

@Document(collection = "ocr_collection")
class Task : Auditable<UUID>() {

@Id
@Field(name = "_id")
var id: UUID? = null
class Task(
@Id @Field(name = "_id") var id: UUID? = null
) : Auditable<UUID>() {

lateinit var name: String
var ocrConfig: OcrConfig = OcrConfig()
var schedulerConfig: SchedulerConfig = SchedulerConfig()
var ocrProgress: OcrProgress = OcrProgress()
var inDocuments: MutableCollection<com.github.nenadjakic.ocr.studio.entity.Document> = mutableListOf()
var inDocuments: DocumentMutableList = DocumentMutableList()
set(value) {
inDocuments.clear()
inDocuments.addAll(value)
}

fun addInDocument(document: com.github.nenadjakic.ocr.studio.entity.Document): Boolean = inDocuments.add(document)

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false

other as Task

if (id != other.id) return false
if (name != other.name) return false
if (ocrConfig != other.ocrConfig) return false
if (schedulerConfig != other.schedulerConfig) return false
if (ocrProgress != other.ocrProgress) return false
if (inDocuments != other.inDocuments) return false

return true
}

override fun hashCode(): Int {
var result = id?.hashCode() ?: 0
result = 31 * result + name.hashCode()
result = 31 * result + ocrConfig.hashCode()
result = 31 * result + schedulerConfig.hashCode()
result = 31 * result + ocrProgress.hashCode()
result = 31 * result + inDocuments.hashCode()
return result
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
package com.github.nenadjakic.ocr.studio.exception

class OcrException(message: String) : Exception(message)
open class OcrException(message: String) : Exception(message)

class IllegalStateOcrException(message: String): OcrException(message)

class MissingDocumentOcrException(message: String) : OcrException(message)
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,9 @@ class OcrExecutor(
try {
progressInfo.description = "Starting ocr of documents..."
for (document in task.inDocuments.sortedBy { it.originalFileName }) {
val inFile =
Path.of(ocrProperties.taskPath, task.id.toString(), "input", document.randomizedFileName).toFile()
val inFile = TaskFileSystemService.getInputFile(ocrProperties.taskPath, task.id!!, document.randomizedFileName)
if (inFile.exists()) {
val outFile =
Path.of(ocrProperties.taskPath, task.id.toString(), "output", UUID.randomUUID().toString())
.toFile()
val outFile = TaskFileSystemService.getOutputFile(ocrProperties.taskPath, task.id!!, UUID.randomUUID().toString())

document.outDocument = OutDocument()
document.outDocument!!.outputFileName = outFile.name
Expand Down Expand Up @@ -97,9 +94,10 @@ class OcrExecutor(
}
if (task.ocrConfig.mergeDocuments) {
progressInfo.description = "Starting merging of documents..."
val mergedFile =
Path.of(ocrProperties.taskPath, task.id.toString(), "output", "merged_" + UUID.randomUUID() + "." + task.ocrConfig.fileFormat.getExtension())
.toFile()
val mergedFileName = "merged_" + UUID.randomUUID() + "." + task.ocrConfig.fileFormat.getExtension()
task.inDocuments.mergedDocumentName = mergedFileName

val mergedFile = TaskFileSystemService.getOutputFile(ocrProperties.taskPath, task.id!!, mergedFileName)

when (task.ocrConfig.fileFormat) {
OcrConfig.FileFormat.TEXT -> {
Expand Down Expand Up @@ -144,9 +142,9 @@ class OcrExecutor(
val saxHandler = HocrSaxHandler()

BufferedWriter(FileWriter(mergedFile)).use { writer ->
//writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
//writer.newLine()
// writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">")
writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
writer.newLine()
writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">")
writer.write("<html>")
writer.newLine()
for ((index, document) in task.inDocuments.sortedBy { it.originalFileName }.withIndex()) {
Expand Down Expand Up @@ -183,11 +181,6 @@ class OcrExecutor(
}
}

private data class InputData (
val fileFormat: OcrConfig.FileFormat,
val file: File
)

@Throws(IOException::class)
private fun preProcessDocument(preProcess: Boolean, inFile: File): Map<Long, File> {
val files = mutableMapOf<Long, File>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,13 @@ interface ParallelizationManager {
fun interruptAll(): Map<UUID, Boolean?>

fun getProgress(id: UUID): ProgressInfo?

fun clearFinished()

fun clearInterrupted()

fun clear() {
clearInterrupted()
clearFinished()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,34 @@ class ParallelizationManagerImpl(
val runnable = runnables[id]
return runnable?.progressInfo
}

override fun clearFinished() {
val ids = mutableListOf<UUID>()

futures.entries.removeIf { entry ->
if (entry.value.isDone) {
ids.add(entry.key)
true
} else {
false
}
}

runnables.entries.removeIf { ids.contains(it.key) }
}

override fun clearInterrupted() {
val ids = mutableListOf<UUID>()

futures.entries.removeIf { entry ->
if (entry.value.isCancelled) {
ids.add(entry.key)
true
} else {
false
}
}

runnables.entries.removeIf { ids.contains(it.key) }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import java.util.function.Consumer

private fun <S, T> map(modelMapper: ModelMapper, source: S, type: Class<T>?): T = modelMapper.map(source, type)


fun <S, T> ModelMapper.collectionMap(source: List<S>?, type: Class<T>?): List<T> {
val result: MutableList<T> = ArrayList()
source?.forEach(Consumer { result.add(map(this, it, type)) })
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ class HocrSaxHandler: DefaultHandler() {

override fun characters(ch: CharArray?, start: Int, length: Int) {
if (insideElement == InsideElement.BODY) {
bodyBuilder.append(ch, start, length)
bodyBuilder.appendRange(ch!!, start, start + length)
} else if (insideElement == InsideElement.HEAD) {
headBuilder.append(ch, start, length)
headBuilder.appendRange(ch!!, start, start + length)
}
}

Expand Down
Loading

0 comments on commit c1e5581

Please sign in to comment.