@@ -14,24 +14,43 @@ object KafkaStage:
14
14
private val logger = LoggerFactory .getLogger(classOf [KafkaStage .type ])
15
15
16
16
extension [K , V ](source : Source [ProducerRecord [K , V ]])
17
+ /** Publish the messages using a producer created with the given `settings`.
18
+ *
19
+ * @return
20
+ * A stream of published records metadata, in the order in which the [[ProducerRecord ]]s are received.
21
+ */
17
22
def mapPublish (settings : ProducerSettings [K , V ])(using StageCapacity , Ox ): Source [RecordMetadata ] =
18
23
mapPublish(settings.toProducer, closeWhenComplete = true )
19
24
25
+ /** Publish the messages using the given `producer`. The producer is closed depending on the `closeWhenComplete` flag, after all
26
+ * messages are published, or when an exception occurs.
27
+ *
28
+ * @return
29
+ * A stream of published records metadata, in the order in which the [[ProducerRecord ]]s are received.
30
+ */
20
31
def mapPublish (producer : KafkaProducer [K , V ], closeWhenComplete : Boolean )(using StageCapacity , Ox ): Source [RecordMetadata ] =
21
32
source.mapAsView(r => SendPacket (List (r), Nil )).mapPublishAndCommit(producer, closeWhenComplete, commitOffsets = false )
22
33
23
34
extension [K , V ](source : Source [SendPacket [K , V ]])
24
- /** For each packet, first all messages (producer records) are sent. Then, all messages up to the offsets of the consumer messages are
25
- * committed. The metadata of the published records is sent downstream.
35
+ /** For each packet, first all messages (producer records) from [[SendPacket.send ]] are sent, using a producer created with the given
36
+ * `producerSettings`. Then, all messages from [[SendPacket.commit ]] are committed: for each topic-partition, up to the highest
37
+ * observed offset.
38
+ *
39
+ * @return
40
+ * A stream of published records metadata, in the order in which the [[SendPacket ]]s are received.
26
41
*/
27
42
def mapPublishAndCommit (producerSettings : ProducerSettings [K , V ])(using StageCapacity , Ox ): Source [RecordMetadata ] =
28
43
mapPublishAndCommit(producerSettings.toProducer, closeWhenComplete = true )
29
44
30
- /** For each packet, first all messages (producer records) are sent. Then, all messages up to the offsets of the consumer messages are
31
- * committed. The metadata of the published records is sent downstream.
45
+ /** For each packet, first all messages (producer records) are sent, using the given `producer`. Then, all messages from
46
+ * [[SendPacket.commit ]] are committed: for each topic-partition, up to the highest observed offset.
47
+ *
48
+ * The producer is closed depending on the `closeWhenComplete` flag, after all messages are published, or when an exception occurs.
32
49
*
33
50
* @param producer
34
51
* The producer that is used to send messages.
52
+ * @return
53
+ * A stream of published records metadata, in the order in which the [[SendPacket ]]s are received.
35
54
*/
36
55
def mapPublishAndCommit (producer : KafkaProducer [K , V ], closeWhenComplete : Boolean )(using StageCapacity , Ox ): Source [RecordMetadata ] =
37
56
mapPublishAndCommit(producer, closeWhenComplete, commitOffsets = true )
@@ -40,29 +59,45 @@ object KafkaStage:
40
59
StageCapacity ,
41
60
Ox
42
61
): Source [RecordMetadata ] =
62
+ // source - the upstream from which packets are received
63
+
64
+ // the result, where metadata of published records is sent in the same order, as the received packets
43
65
val c = StageCapacity .newChannel[RecordMetadata ]
66
+ // a helper channel to signal any exceptions that occur while publishing or committing offsets
44
67
val exceptions = Channel .unlimited[Exception ]
68
+ // possible out-of-order metadata of the records published from `packet.send`
45
69
val metadata = Channel [(Long , RecordMetadata )](128 )
70
+ // packets which are fully sent, and should be committed
46
71
val toCommit = Channel [SendPacket [_, _]](128 )
47
-
72
+ // used to reorder values received from `metadata` using the assigned sequence numbers
48
73
val sendInSequence = SendInSequence (c)
49
74
50
75
fork {
51
76
try
52
- // starting a nested scope, so that the committer is interrupted when the main process ends
77
+ // starting a nested scope, so that the committer is interrupted when the main process ends (when there's an exception)
53
78
scoped {
54
79
// committer
55
- if commitOffsets then fork(tapException(doCommit(toCommit))(c.error))
80
+ val commitDoneSource = if commitOffsets then Source .fromFork( fork(tapException(doCommit(toCommit))(c.error))) else Source .empty
56
81
57
82
repeatWhile {
58
83
select(exceptions.receiveClause, metadata.receiveClause, source.receiveClause) match
59
- case ChannelClosed .Error (r) => c.error(r); false
60
- case ChannelClosed .Done => sendInSequence.drainFromThenDone(exceptions, metadata); false
84
+ case ChannelClosed .Error (r) => c.error(r); false
85
+ case ChannelClosed .Done =>
86
+ // waiting until all records are sent and metadata forwarded to `c`
87
+ sendInSequence.drainFrom(metadata, exceptions)
88
+ // we now know that there won't be any more offsets sent to be committed - we can complete the channel
89
+ toCommit.done()
90
+ // waiting until the commit fork is done
91
+ commitDoneSource.receive()
92
+ // completing the downstream
93
+ c.done()
94
+ // and finally winding down this scope & fork
95
+ false
61
96
case exceptions.Received (e) => c.error(e); false
62
97
case metadata.Received ((s, m)) => sendInSequence.send(s, m); true
63
98
case source.Received (packet) =>
64
99
try
65
- sendPacket(producer, packet, sendInSequence, toCommit, exceptions, metadata)
100
+ sendPacket(producer, packet, sendInSequence, toCommit, exceptions, metadata, commitOffsets )
66
101
true
67
102
catch
68
103
case e : Exception =>
@@ -84,7 +119,8 @@ object KafkaStage:
84
119
sendInSequence : SendInSequence [RecordMetadata ],
85
120
toCommit : Sink [SendPacket [_, _]],
86
121
exceptions : Sink [Exception ],
87
- metadata : Sink [(Long , RecordMetadata )]
122
+ metadata : Sink [(Long , RecordMetadata )],
123
+ commitOffsets : Boolean
88
124
): Unit =
89
125
val leftToSend = new AtomicInteger (packet.send.size)
90
126
packet.send.foreach { toSend =>
@@ -94,47 +130,48 @@ object KafkaStage:
94
130
(m : RecordMetadata , e : Exception ) =>
95
131
if e != null then exceptions.send(e)
96
132
else {
133
+ // sending commit request first, as when upstream `source` is done, we need to know that all commits are
134
+ // scheduled in order to shut down properly
135
+ if commitOffsets && leftToSend.decrementAndGet() == 0 then toCommit.send(packet)
97
136
metadata.send((sequenceNo, m))
98
- if leftToSend.decrementAndGet() == 0 then toCommit.send(packet)
99
137
}
100
138
)
101
139
}
102
140
103
- /** Sends `T` elements to the given `c` sink, when elements with subsequence sequence numbers are available. */
104
- private class SendInSequence [T ](c : Sink [T ]):
105
- private var sequenceNoNext = 0L
106
- private var sequenceNoToSendNext = 0L
107
- private val toSend = mutable.SortedSet [(Long , T )]()(Ordering .by(_._1))
108
-
109
- def nextSequenceNo : Long =
110
- val n = sequenceNoNext
111
- sequenceNoNext += 1
112
- n
113
-
114
- def send (sequenceNo : Long , v : T ): Unit =
115
- toSend.add((sequenceNo, v))
141
+ /** Sends `T` elements to the given `c` sink, when elements with subsequent sequence numbers are available. Thread-unsafe. */
142
+ private class SendInSequence [T ](c : Sink [T ]):
143
+ private var sequenceNoNext = 0L
144
+ private var sequenceNoToSendNext = 0L
145
+ private val toSend = mutable.SortedSet [(Long , T )]()(Ordering .by(_._1))
146
+
147
+ def nextSequenceNo : Long =
148
+ val n = sequenceNoNext
149
+ sequenceNoNext += 1
150
+ n
151
+
152
+ def send (sequenceNo : Long , v : T ): Unit =
153
+ toSend.add((sequenceNo, v))
154
+ trySend()
155
+
156
+ def allSent : Boolean = sequenceNoNext == sequenceNoToSendNext
157
+
158
+ @ tailrec
159
+ private def trySend (): Unit = toSend.headOption match
160
+ case Some ((s, m)) if s == sequenceNoToSendNext =>
161
+ toSend.remove((s, m))
162
+ c.send(m)
163
+ sequenceNoToSendNext += 1
116
164
trySend()
165
+ case _ => ()
117
166
118
- def allSent : Boolean = sequenceNoNext == sequenceNoToSendNext
119
-
120
- @ tailrec
121
- private def trySend (): Unit = toSend.headOption match
122
- case Some ((s, m)) if s == sequenceNoToSendNext =>
123
- toSend.remove((s, m))
124
- c.send(m)
125
- sequenceNoToSendNext += 1
126
- trySend()
127
- case _ => ()
128
-
129
- @ tailrec
130
- final def drainFromThenDone (
131
- exceptions : Source [Exception ],
132
- incoming : Source [(Long , T )]
133
- ): Unit =
134
- if allSent then c.done()
135
- else
136
- select(exceptions.receiveClause, incoming.receiveClause) match
137
- case ChannelClosed .Error (r) => c.error(r)
138
- case ChannelClosed .Done => throw new IllegalStateException ()
139
- case exceptions.Received (e) => c.error(e)
140
- case incoming.Received ((s, m)) => send(s, m); drainFromThenDone(exceptions, incoming)
167
+ @ tailrec
168
+ final def drainFrom (
169
+ incoming : Source [(Long , T )],
170
+ exceptions : Source [Exception ]
171
+ ): Unit =
172
+ if ! allSent then
173
+ select(exceptions.receiveClause, incoming.receiveClause) match
174
+ case ChannelClosed .Error (r) => c.error(r)
175
+ case ChannelClosed .Done => throw new IllegalStateException ()
176
+ case exceptions.Received (e) => c.error(e)
177
+ case incoming.Received ((s, m)) => send(s, m); drainFrom(incoming, exceptions)
0 commit comments