Skip to content

Commit

Permalink
Preserve br tag in ttml (#106)
Browse files Browse the repository at this point in the history
* Preserve br tag in ttml

* [ttml] Change to more appropriate name and optimize ttml xml decoder
  • Loading branch information
NhanNguyen700 authored Jun 17, 2024
1 parent 2f42372 commit 35e8bad
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 2 deletions.
24 changes: 24 additions & 0 deletions testdata/example-in-breaklines.ttml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<tt>
<head>
</head>
<body>
<div>
<p xml:id="1" begin="00:00:00.000" end="00:00:01.000">
<span>First line<br/>
Second line</span>
</p>
<p xml:id="2" begin="00:00:01.000" end="00:00:02.000">
<span>Third line<br></br>Fourth line</span>
</p>
<p xml:id="3" begin="00:00:02.000" end="00:00:03.000">
Fifth line
<br/>
Sixth <span>middle</span> line
</p>
<p xml:id="4" begin="00:00:03.000" end="00:00:04.000">
Seventh line
<br></br>Eighth <span>middle</span> line
</p>
</div>
</body>
</tt>
32 changes: 32 additions & 0 deletions testdata/example-out-breaklines.ttml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns:tts="http://www.w3.org/ns/ttml#styling">
<head>
<styling></styling>
<layout></layout>
</head>
<body>
<div>
<p begin="00:00:00.000" end="00:00:01.000">
<span>First line</span>
<br></br>
<span>Second line</span>
</p>
<p begin="00:00:01.000" end="00:00:02.000">
<span>Third lineFourth line</span>
</p>
<p begin="00:00:02.000" end="00:00:03.000">
<span>Fifth line</span>
<br></br>
<span>Sixth </span>
<span>middle </span>
<span>line</span>
</p>
<p begin="00:00:03.000" end="00:00:04.000">
<span>Seventh line</span>
<br></br>
<span>Eighth </span>
<span>middle </span>
<span>line</span>
</p>
</div>
</body>
</tt>
34 changes: 33 additions & 1 deletion ttml.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,38 @@ func (i *TTMLInItems) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err
return nil
}

type ttmlXmlDecoder struct {
xml.Decoder
holdingToken xml.Token
}

// Token implements the TokenReader interface, when it meets the "br" tag, it will hold the token and return a newline
// instead. This is to work around the fact that the go xml unmarshaler will ignore the "br" tag if it's within a
// character data field.
func (r *ttmlXmlDecoder) Token() (xml.Token, error) {
if r.holdingToken != nil {
returnToken := r.holdingToken
r.holdingToken = nil
return returnToken, nil
}

t, err := r.Decoder.Token()
if err != nil {
return nil, err
}

if se, ok := t.(xml.StartElement); ok && strings.ToLower(se.Name.Local) == "br" {
r.holdingToken = t
return xml.CharData("\n"), nil
}

return t, nil
}

func newTTMLXmlDecoder(ts TTMLInSubtitle) *ttmlXmlDecoder {
return &ttmlXmlDecoder{Decoder: *xml.NewDecoder(strings.NewReader("<p>" + ts.Items + "</p>")), holdingToken: nil}
}

// TTMLInItem represents an input TTML item
type TTMLInItem struct {
Style string `xml:"style,attr,omitempty"`
Expand Down Expand Up @@ -380,7 +412,7 @@ func ReadFromTTML(i io.Reader) (o *Subtitles, err error) {

// Unmarshal items
var items = TTMLInItems{}
if err = xml.Unmarshal([]byte("<span>"+ts.Items+"</span>"), &items); err != nil {
if err = newTTMLXmlDecoder(ts).Decode(&items); err != nil {
err = fmt.Errorf("astisub: unmarshaling items failed: %w", err)
return
}
Expand Down
20 changes: 19 additions & 1 deletion ttml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package astisub_test

import (
"bytes"
"github.com/asticode/go-astikit"
"io/ioutil"
"strings"
"testing"

"github.com/asticode/go-astikit"

"github.com/asticode/go-astisub"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -50,3 +52,19 @@ func TestTTML(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}

func TestTTMLBreakLines(t *testing.T) {
// Open
s, err := astisub.OpenFile("./testdata/example-in-breaklines.ttml")
assert.NoError(t, err)

// Write
w := &bytes.Buffer{}
err = s.WriteToTTML(w)
assert.NoError(t, err)

c, err := ioutil.ReadFile("./testdata/example-out-breaklines.ttml")
assert.NoError(t, err)

assert.Equal(t, strings.TrimSpace(string(c)), strings.TrimSpace(w.String()))
}

0 comments on commit 35e8bad

Please sign in to comment.