Skip to content

Commit

Permalink
Merge branch 'otiai10:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
joelrose authored Feb 25, 2024
2 parents 8f56aef + d85513d commit 92b9981
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 39 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/go-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ jobs:
macos:
strategy:
matrix:
os: [macos-latest, macos-12]
go: ['1.17', '1.18', '1.19']
os: [macos-latest, macos-13]
go: ['1.19', '1.20', '1.21']
runs-on: ${{ matrix.os }}
name: macOS Go${{ matrix.go }} on ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go }}
- name: Install tesseract
Expand All @@ -36,9 +36,9 @@ jobs:
needs: macos
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: '1.18'
- name: Install packages
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/runtime-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
name: Docker ${{ matrix.runtime }}
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: ${{ matrix.runtime }}
shell: 'script -q -e -c "bash {0}"'
run: bash ./test/runtime --driver docker --build --run ${{ matrix.runtime }}
Expand All @@ -36,7 +36,7 @@ jobs:
name: Test Dockerfile on the repo root
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Build
run: docker build . -t otiai10/gosseract
- name: Test Run
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/runtime-vagrant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ jobs:
name: Vagrant ${{ matrix.runtime }}
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: ${{ matrix.runtime }}
run: bash ./test/runtime --driver vagrant --build --run ${{ matrix.runtime }}
4 changes: 2 additions & 2 deletions .github/workflows/windows-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
name: Windows Test
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: 1.18
- name: Install Tesseract
Expand Down
6 changes: 6 additions & 0 deletions all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ func TestNewClient(t *testing.T) {
Expect(t, client).TypeOf("*gosseract.Client")
}

func TestDoubleClose(t *testing.T) {
client := NewClient()
client.Close()
client.Close()
}

func TestClient_SetTessdataPrefix(t *testing.T) {
client := NewClient()
defer client.Close()
Expand Down
79 changes: 57 additions & 22 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@ import (
"image"
"os"
"path/filepath"
"runtime"
"strings"
"unsafe"
)

var (
// ErrClientNotConstructed is returned when a client is not constructed
ErrClientNotConstructed = fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
)

// Version returns the version of Tesseract-OCR
func Version() string {
api := C.Create()
Expand Down Expand Up @@ -72,18 +78,29 @@ func NewClient() *Client {
shouldInit: true,
Languages: []string{"eng"},
}
// set a finalizer to close the client when it's unused and not closed by the user
runtime.SetFinalizer(client, (*Client).Close)
return client
}

// Close frees allocated API. This MUST be called for ANY client constructed by "NewClient" function.
func (client *Client) Close() (err error) {
// no need for a finalizer anymore
runtime.SetFinalizer(client, nil)
if client.api == nil {
// already closed or not constructed
return nil
}
// defer func() {
// if e := recover(); e != nil {
// err = fmt.Errorf("%v", e)
// }
// }()

C.Clear(client.api)
C.Free(client.api)
client.api = nil

if client.pixImage != nil {
C.DestroyPixImage(client.pixImage)
client.pixImage = nil
Expand All @@ -93,6 +110,9 @@ func (client *Client) Close() (err error) {

// Version provides the version of Tesseract used by this client.
func (client *Client) Version() string {
if client.api == nil {
return ""
}
version := C.Version(client.api)
return C.GoString(version)
}
Expand All @@ -101,7 +121,7 @@ func (client *Client) Version() string {
func (client *Client) SetImage(imagepath string) error {

if client.api == nil {
return fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
return ErrClientNotConstructed
}
if imagepath == "" {
return fmt.Errorf("image path cannot be empty")
Expand All @@ -119,6 +139,10 @@ func (client *Client) SetImage(imagepath string) error {
defer C.free(unsafe.Pointer(p))

img := C.CreatePixImageByFilePath(p)
if img == nil {
return fmt.Errorf("failed to create PixImage from file path: %s", imagepath)
}

client.pixImage = img

return nil
Expand All @@ -128,7 +152,7 @@ func (client *Client) SetImage(imagepath string) error {
func (client *Client) SetImageFromBytes(data []byte) error {

if client.api == nil {
return fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
return ErrClientNotConstructed
}
if len(data) == 0 {
return fmt.Errorf("image data cannot be empty")
Expand All @@ -140,13 +164,20 @@ func (client *Client) SetImageFromBytes(data []byte) error {
}

img := C.CreatePixImageFromBytes((*C.uchar)(unsafe.Pointer(&data[0])), C.int(len(data)))
if img == nil {
return fmt.Errorf("failed to create PixImage from bytes: %d", len(data))
}

client.pixImage = img

return nil
}

// SetLanguage sets languages to use. English as default.
func (client *Client) SetLanguage(langs ...string) error {
if client.api == nil {
return ErrClientNotConstructed
}
if len(langs) == 0 {
return fmt.Errorf("languages cannot be empty")
}
Expand All @@ -160,55 +191,50 @@ func (client *Client) SetLanguage(langs ...string) error {

// DisableOutput ...
func (client *Client) DisableOutput() error {
err := client.SetVariable(DEBUG_FILE, os.DevNull)

client.setVariablesToInitializedAPIIfNeeded()

return err
return client.SetVariable(DEBUG_FILE, os.DevNull)
}

// SetWhitelist sets whitelist chars.
// See official documentation for whitelist here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#dictionaries-word-lists-and-patterns
func (client *Client) SetWhitelist(whitelist string) error {
err := client.SetVariable(TESSEDIT_CHAR_WHITELIST, whitelist)

client.setVariablesToInitializedAPIIfNeeded()

return err
return client.SetVariable(TESSEDIT_CHAR_WHITELIST, whitelist)
}

// SetBlacklist sets blacklist chars.
// See official documentation for blacklist here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#dictionaries-word-lists-and-patterns
func (client *Client) SetBlacklist(blacklist string) error {
err := client.SetVariable(TESSEDIT_CHAR_BLACKLIST, blacklist)

client.setVariablesToInitializedAPIIfNeeded()

return err
return client.SetVariable(TESSEDIT_CHAR_BLACKLIST, blacklist)
}

// SetVariable sets parameters, representing tesseract::TessBaseAPI->SetVariable.
// See official documentation here https://zdenop.github.io/tesseract-doc/classtesseract_1_1_tess_base_a_p_i.html#a2e09259c558c6d8e0f7e523cbaf5adf5
// Because `api->SetVariable` must be called after `api->Init`, this method cannot detect unexpected key for variables.
// Check `client.setVariablesToInitializedAPI` for more information.
func (client *Client) SetVariable(key SettableVariable, value string) error {
if client.api == nil {
return ErrClientNotConstructed
}
client.Variables[key] = value

client.setVariablesToInitializedAPIIfNeeded()

return nil
return client.setVariablesToInitializedAPIIfNeeded()
}

// SetPageSegMode sets "Page Segmentation Mode" (PSM) to detect layout of characters.
// See official documentation for PSM here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#page-segmentation-method
// See https://github.com/otiai10/gosseract/issues/52 for more information.
func (client *Client) SetPageSegMode(mode PageSegMode) error {
if client.api == nil {
return ErrClientNotConstructed
}
C.SetPageSegMode(client.api, C.int(mode))
return nil
}

// SetConfigFile sets the file path to config file.
func (client *Client) SetConfigFile(fpath string) error {
if client.api == nil {
return ErrClientNotConstructed
}
info, err := os.Stat(fpath)
if err != nil {
return err
Expand All @@ -226,6 +252,9 @@ func (client *Client) SetConfigFile(fpath string) error {
// SetTessdataPrefix sets path to the models directory.
// Environment variable TESSDATA_PREFIX is used as default.
func (client *Client) SetTessdataPrefix(prefix string) error {
if client.api == nil {
return ErrClientNotConstructed
}
if prefix == "" {
return fmt.Errorf("tessdata prefix could not be empty")
}
Expand Down Expand Up @@ -349,6 +378,9 @@ func (client *Client) setVariablesToInitializedAPIIfNeeded() error {

// Text finally initialize tesseract::TessBaseAPI, execute OCR and extract text detected as string.
func (client *Client) Text() (out string, err error) {
if client.api == nil {
return out, ErrClientNotConstructed
}
if err = client.init(); err != nil {
return
}
Expand All @@ -362,6 +394,9 @@ func (client *Client) Text() (out string, err error) {
// HOCRText finally initialize tesseract::TessBaseAPI, execute OCR and returns hOCR text.
// See https://en.wikipedia.org/wiki/HOCR for more information of hOCR.
func (client *Client) HOCRText() (out string, err error) {
if client.api == nil {
return out, ErrClientNotConstructed
}
if err = client.init(); err != nil {
return
}
Expand Down Expand Up @@ -398,7 +433,7 @@ type BoundingBox struct {
// GetBoundingBoxes returns bounding boxes for each matched word
func (client *Client) GetBoundingBoxes(level PageIteratorLevel) (out []BoundingBox, err error) {
if client.api == nil {
return out, fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
return out, ErrClientNotConstructed
}
if err = client.init(); err != nil {
return
Expand Down Expand Up @@ -439,7 +474,7 @@ func GetAvailableLanguages() ([]string, error) {
// according to the c++ api that returns a formatted TSV output. Reference: `TessBaseAPI::GetTSVText`.
func (client *Client) GetBoundingBoxesVerbose() (out []BoundingBox, err error) {
if client.api == nil {
return out, fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
return out, ErrClientNotConstructed
}
if err = client.init(); err != nil {
return
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ module github.com/joelrose/gosseract

go 1.18

require github.com/otiai10/mint v1.4.1
require github.com/otiai10/mint v1.6.3
5 changes: 2 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
github.com/otiai10/curr v1.0.0 h1:TJIWdbX0B+kpNagQrjgq8bCMrbhiuX73M2XwgtDMoOI=
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
github.com/otiai10/mint v1.4.1/go.mod h1:gifjb2MYOoULtKLqUAEILUG/9KONW6f7YsJ6vQLTlFI=
github.com/otiai10/mint v1.6.3 h1:87qsV/aw1F5as1eH1zS/yqHY85ANKVMgkDrf9rcxbQs=
github.com/otiai10/mint v1.6.3/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
7 changes: 7 additions & 0 deletions preprocessflags_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package gosseract

// #cgo CXXFLAGS: -std=c++0x
// #cgo CPPFLAGS: -I/opt/homebrew/include -I/usr/local/include
// #cgo CPPFLAGS: -Wno-unused-result
// #cgo LDFLAGS: -L/opt/homebrew/lib -L/usr/local/lib -lleptonica -ltesseract
import "C"
5 changes: 3 additions & 2 deletions preprocessflags_x.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
//go:build !darwin

package gosseract

// #cgo CXXFLAGS: -std=c++0x
// #cgo CPPFLAGS: -I/usr/local/include
// #cgo CPPFLAGS: -Wno-unused-result
// #cgo darwin LDFLAGS: -L/usr/local/lib -llept -ltesseract
// #cgo !darwin LDFLAGS: -L/usr/local/lib -lleptonica -ltesseract
// #cgo LDFLAGS: -L/usr/local/lib -lleptonica -ltesseract
import "C"

0 comments on commit 92b9981

Please sign in to comment.