Skip to content

Commit

Permalink
feat(file): add apache tika for file content extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
Yelinz committed May 31, 2024
1 parent a8a08ad commit 0557c59
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
13 changes: 13 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ services:
# - POSTGRES_PASSWORD=
volumes:
- dbdata:/var/lib/postgresql/data

alexandria:
image: ghcr.io/projectcaluma/alexandria:dev
ports:
- "8000:8000"
depends_on:
- db
- minio
- tika
environment:
- DATABASE_HOST=db
- DATABASE_PORT=5432
- TIKA_CLIENT_ONLY=true
- TIKA_SERVER_ENDPOINT=http://tika:9998
# following options are a must to configure on production system:
# https://docs.djangoproject.com/en/2.1/ref/settings/#std:setting-SECRET_KEY
# - SECRET_KEY=
Expand All @@ -27,6 +31,7 @@ services:
# https://docs.djangoproject.com/en/2.1/ref/settings/#password
# same as postgres password above
# - DATABASE_PASSWORD=

dms:
image: ghcr.io/adfinis/document-merge-service:6.4.4
depends_on:
Expand All @@ -42,6 +47,7 @@ services:
- OIDC_BEARER_TOKEN_REVALIDATION_TIME=300
- SECRET_KEY=aaa
- DOCXTEMPLATE_JINJA_EXTENSIONS=

minio:
image: minio/minio:RELEASE.2023-11-06T22-26-08Z
volumes:
Expand All @@ -54,6 +60,7 @@ services:
- MINIO_ROOT_USER=very
- MINIO_ROOT_PASSWORD=secret
command: server data --console-address ":9090"

mc:
image: minio/mc:RELEASE.2023-11-06T04-19-23Z
restart: on-failure
Expand All @@ -68,6 +75,12 @@ services:
depends_on:
- minio

tika:
image: apache/tika:2.9.2.0-full
ports:
- "9998:9998"


volumes:
dbdata:
minio_data:
Expand Down
36 changes: 34 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ boto3 = "^1.29.7"
tqdm = "^4.66.1"
manabi = "^1.4.0"
python-magic = "^0.4.27"
tika = "^2.6.0"

[tool.poetry.group.dev.dependencies]
black = "24.4.2"
Expand Down

0 comments on commit 0557c59

Please sign in to comment.