Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add equality functions for token features or POSs #273

Merged
merged 1 commit into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions tokenizer/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,29 @@ func (t Token) POS() []string {
return nil
}

// EqualFeatures returns true, if the features of tokens are equal.
func (t Token) EqualFeatures(tt Token) bool {
return EqualFeatures(t.Features(), tt.Features())
}

// EqualPOS returns true, if the POSs of tokens are equal.
func (t Token) EqualPOS(tt Token) bool {
return EqualFeatures(t.POS(), tt.POS())
}

// EqualFeatures returns true, if the features are equal.
func EqualFeatures(lhs, rhs []string) bool {
if len(lhs) != len(rhs) {
return false
}
for i := 0; i < len(lhs); i++ {
if lhs[i] != rhs[i] {
return false
}
}
return true
}

// InflectionalType returns the inflectional type feature if exists.
func (t Token) InflectionalType() (string, bool) {
return t.pickupFromFeatures(dict.InflectionalType)
Expand Down
173 changes: 173 additions & 0 deletions tokenizer/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,3 +657,176 @@ func TestEqual(t *testing.T) {
}
}
}

func Test_EqualFeatures(t *testing.T) {
d, err := dict.LoadDictFile(testDictPath)
if err != nil {
t.Fatalf("unexpected error, %v", err)
}
tnz, err := New(d)
if err != nil {
t.Fatalf("unexpected error, %v", err)
}

tokens1 := tnz.Tokenize("公園に行くトトロ") // BOS/公園/に/行く/トトロ/EOS
if want, got := 6, len(tokens1); want != got {
t.Fatalf("token length: want %d, got %d", want, got)
}
tokens2 := tnz.Tokenize("学校に行くトトロ")
if want, got := 6, len(tokens2); want != got {
t.Fatalf("token length: want %d, got %d", want, got)
}

testdata := []struct {
name string
lhs, rhs Token
want bool
}{
{
name: "BOS vs BOS",
lhs: tokens1[0],
rhs: tokens2[0],
want: true,
},
{
name: "公園 vs 学校",
lhs: tokens1[1],
rhs: tokens2[1],
want: false,
},
{
name: "に vs に",
lhs: tokens1[2],
rhs: tokens2[2],
want: true,
},
{
name: "行く vs 行く",
lhs: tokens1[3],
rhs: tokens2[3],
want: true,
},
{
name: "トトロ vs トトロ",
lhs: tokens1[4],
rhs: tokens2[4],
want: true,
},
{
name: "EOS vs EOS",
lhs: tokens1[5],
rhs: tokens2[5],
want: true,
},
{
name: "BOS vs EOS",
lhs: tokens1[0],
rhs: tokens2[5],
want: true,
},
{
name: "学校 vs トトロ",
lhs: tokens1[0],
rhs: tokens2[4],
want: false,
},
}
for _, tt := range testdata {
t.Run(tt.name, func(t *testing.T) {
if got := EqualFeatures(tt.lhs.Features(), tt.rhs.Features()); tt.want != got {
t.Errorf("want %t, got %t, %q%+v vs %q%+v", tt.want, got, tt.lhs.Surface, tt.lhs.Features(), tt.rhs.Surface, tt.rhs.Features())
}
if got := tt.lhs.EqualFeatures(tt.rhs); tt.want != got {
t.Errorf("want %t, got %t, %q%+v vs %q%+v", tt.want, got, tt.lhs.Surface, tt.lhs.Features(), tt.rhs.Surface, tt.rhs.Features())
}
})
}
}

func Test_EqualPOS(t *testing.T) {
d, err := dict.LoadDictFile(testDictPath)
if err != nil {
t.Fatalf("unexpected error, %v", err)
}
tnz, err := New(d)
if err != nil {
t.Fatalf("unexpected error, %v", err)
}

tokens1 := tnz.Tokenize("公園に行くトトロ") // BOS/公園/に/行く/トトロ/EOS
if want, got := 6, len(tokens1); want != got {
t.Fatalf("token length: want %d, got %d", want, got)
}
tokens2 := tnz.Tokenize("学校に行くトトロ")
if want, got := 6, len(tokens2); want != got {
t.Fatalf("token length: want %d, got %d", want, got)
}

testdata := []struct {
name string
lhs, rhs Token
want bool
}{
{
name: "BOS vs BOS",
lhs: tokens1[0],
rhs: tokens2[0],
want: true,
},
{
name: "公園 vs 学校",
lhs: tokens1[1],
rhs: tokens2[1],
want: true,
},
{
name: "に vs に",
lhs: tokens1[2],
rhs: tokens2[2],
want: true,
},
{
name: "行く vs 行く",
lhs: tokens1[3],
rhs: tokens2[3],
want: true,
},
{
name: "トトロ vs トトロ",
lhs: tokens1[4],
rhs: tokens2[4],
want: true,
},
{
name: "EOS vs EOS",
lhs: tokens1[5],
rhs: tokens2[5],
want: true,
},
{
name: "BOS vs EOS",
lhs: tokens1[0],
rhs: tokens2[5],
want: true,
},
{
name: "学校 vs トトロ",
lhs: tokens1[1],
rhs: tokens2[4],
want: true,
},
{
name: "学校 vs 行く",
lhs: tokens1[1],
rhs: tokens2[3],
want: false,
},
}
for _, tt := range testdata {
t.Run(tt.name, func(t *testing.T) {
if got := tt.lhs.EqualPOS(tt.rhs); tt.want != got {
t.Errorf("want %t, got %t, %q%+v vs %q%+v", tt.want, got, tt.lhs.Surface, tt.lhs.POS(), tt.rhs.Surface, tt.rhs.POS())
}
})
}
}