forked from w3c/mediacapture-depth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
721 lines (714 loc) · 28.1 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
<!DOCTYPE html>
<html>
<head>
<title>
Media Capture Depth Stream Extensions
</title>
<meta charset='utf-8'>
<script src='//www.w3.org/Tools/respec/respec-w3c-common' async class=
"remove">
</script>
<script class='remove'>
var respecConfig = {
specStatus: "ED",
shortName: "mediacapture-depth",
// publishDate: "2009-08-06",
// copyrightStart: "2005"
previousPublishDate: "2015-12-08",
previousMaturity: "WD",
edDraftURI: "https://w3c.github.io/mediacapture-depth/",
// lcEnd: "2009-08-05",
noLegacyStyle: true,
editors: [
{
w3cid: "41974",
name: "Anssi Kostiainen",
company: "Intel",
companyURL: "http://www.intel.com/"
},
{
w3cid: "68202",
name: "Ningxin Hu",
company: "Intel",
companyURL: "http://www.intel.com/"
},
{
w3cid: "76096",
name: "Rob Manson",
company: "Invited Expert"
}
],
wg: [
"Device APIs Working Group",
"Web Real-Time Communications Working Group"
],
wgURI: [
"http://www.w3.org/2009/dap/",
"http://www.w3.org/2011/04/webrtc/"
],
wgPublicList: "public-media-capture",
wgPatentURI: [
"http://www.w3.org/2004/01/pp-impl/47318/status",
"http://www.w3.org/2004/01/pp-impl/43696/status"
],
otherLinks: [{
key: "Participate",
data: [
{
value: "Mailing list",
href: "http://lists.w3.org/Archives/Public/public-media-capture/"
},
{
value: "GitHub repository",
href: "https://github.com/w3c/mediacapture-depth/"
},
{
value: "Open issues",
href: "https://github.com/w3c/mediacapture-depth/issues"
},
{
value: "Commit history",
href: "https://github.com/w3c/mediacapture-depth/commits/"
}
]
}],
localBiblio: {
"MEDIACAPTURE-WORKER": {
title: "Media Capture Stream with Worker",
href: "https://w3c.github.io/mediacapture-worker/",
authors: [
"Chia-hung Tai",
"Robert O'Callahan",
"Tzuhao Kuo",
"Anssi Kostiainen"
],
status: "ED",
publisher: "W3C"
}
}
};
</script>
</head>
<body>
<section id='abstract'>
<p>
This specification extends the <em>Media Capture and Streams</em>
specification [[!GETUSERMEDIA]] to allow a <a>depth-only stream</a> or
combined <a>depth+video stream</a> to be requested from the web
platform using APIs familiar to web authors.
</p>
</section>
<section id='sotd'>
<p>
The following substantial changes were made since the <a href=
"http://www.w3.org/TR/2015/WD-mediacapture-depth-20150129/">W3C Working
Draft 29 January 2015</a>:
</p>
<ul>
<li>Clarified <a href="#terminology">Terminology</a> and <a href=
"#dependencies">Dependencies</a>
</li>
<li>Added <a href=
"#mediastreamconstraints-dictionary"><code>MediaTrackConstraints</code></a>
dictionary
</li>
<li>Defined <a href="#media-provider-object">Media provider object</a>
behavior for a <a>depth-only stream</a>
</li>
<li>Removed <code>CanvasImageSource</code> typedef extensions
</li>
<li>Removed <code>ImageData</code> interface extensions
</li>
<li>Defined <a href="#the-video-element">the video element</a> behavior
for a <a>depth-only stream</a> and <a>depth+video stream</a>
</li>
<li>Defined the algorithm to <a>convert the depth map value to
grayscale</a>
</li>
<li>Added <a href=
"#mediatracksettings-dictionary"><code>MediaTrackSettings</code></a>
dictionary and removed the <code>Settings</code> dictionary
</li>
<li>Updated <a href="#examples">Examples</a>
</li>
</ul>
<p>
<strong>This document is not complete and is subject to change. Early
experimentations are encouraged to allow the Media Capture Task Force
to evolve the specification based on technical discussions within the
Task Force, implementation experience gained from early
implementations, and feedback from other groups and
individuals.</strong>
</p>
</section>
<section>
<h2>
Introduction
</h2>
<p>
Depth cameras are increasingly being integrated into devices such as
phones, tablets, and laptops. Depth cameras provide a <a>depth map</a>,
which conveys the distance information between points on an object's
surface and the camera. With depth information, web content and
applications can be enhanced by, for example, the use of hand gestures
as an input mechanism, or by creating 3D models of real-world objects
that can interact and integrate with the web platform. Concrete
applications of this technology include more immersive gaming
experiences, more accessible 3D video conferences, and augmented
reality, to name a few.
</p>
<p>
To bring depth capability to the web platform, this specification
extends the <code><a>MediaStream</a></code> interface [[!GETUSERMEDIA]]
to enable it to also contain depth-based
<a><code>MediaStreamTrack</code></a>s. A depth-based
<a><code>MediaStreamTrack</code></a>, referred to as a <a>depth stream
track</a>, represents an abstraction of a stream of frames that can
each be converted to objects which contain an array of pixel data,
where each pixel represents the distance between the camera and the
objects in the scene for that point in the array. A
<code><a>MediaStream</a></code> object that contains one or more
<a>depth stream track</a>s is referred to as a <a>depth-only stream</a>
or <a>depth+video stream</a>.
</p>
<p>
Depth cameras usually produce 16-bit depth values per pixel. However,
neither the canvas drawing surface used to draw and manipulate 2D
graphics on the web platform nor the <code><a>ImageData</a></code>
interface used to represent image data support 16 bits per pixel. To
address the issue, this specification defines a conversion into a 8-bit
grayscale representation of a <a>depth map</a> for consumption by APIs
that are limited to 8 bits per pixel.
</p>
<p>
The Media Capture Stream with Worker specification
[[MEDIACAPTURE-WORKER]] that complements this specification enables
processing of 16-bit depth values per pixel directly in a worker
environment and makes the <code><video></code> and
<code><canvas></code> indirection and depth-to-grayscale
conversion redundant. This alternative pipeline that supports greater
bit depth and does not incur the performance penalty of the indirection
and conversion enables more advanced use cases.
</p>
</section>
<section>
<h2>
Use cases and requirements
</h2>
<p>
This specification attempts to address the <a href=
"https://www.w3.org/wiki/Media_Capture_Depth_Stream_Extension">Use
Cases and Requirements</a> for accessing depth stream from a depth
camera. See also the <a href=
"https://www.w3.org/wiki/Media_Capture_Depth_Stream_Extension#Examples">
Examples</a> section for concrete usage examples.
</p>
</section>
<section id="conformance">
<p>
This specification defines conformance criteria that apply to a single
product: the <dfn>user agent</dfn> that implements the interfaces that
it contains.
</p>
<p>
Implementations that use ECMAScript to implement the APIs defined in
this specification must implement them in a manner consistent with the
ECMAScript Bindings defined in the Web IDL specification [[!WEBIDL]],
as this specification uses that specification and terminology.
</p>
</section>
<section>
<h2>
Dependencies
</h2>
<p>
The <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-MediaStreamTrack">
<dfn>MediaStreamTrack</dfn></a></code> and <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-MediaStream">
<dfn>MediaStream</dfn></a></code> interfaces this specification extends
are defined in [[!GETUSERMEDIA]].
</p>
<p>
The <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-Constraints">
<dfn>Constraints</dfn></a></code>, <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-MediaStreamConstraints">
<dfn>MediaStreamConstraints</dfn></a></code>, <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-MediaTrackSettings">
<dfn>MediaTrackSettings</dfn></a></code>, and <code><a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#idl-def-MediaTrackConstraints">
<dfn>MediaTrackConstraints</dfn></a></code> dictionaries this
specification extends are based upon the <a href=
"http://w3c.github.io/mediacapture-main/getusermedia.html#constrainable-interface">
<dfn>Constrainable pattern</dfn></a> defined in [[!GETUSERMEDIA]].
</p>
<p>
The <code><a href=
"http://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia">
<dfn>getUserMedia()</dfn></a></code> method and the <code><a href=
"http://w3c.github.io/mediacapture-main/#idl-def-NavigatorUserMediaSuccessCallback">
<dfn>NavigatorUserMediaSuccessCallback</dfn></a></code> callback are
defined in [[!GETUSERMEDIA]].
</p>
<p>
The <code><a href=
"https://html.spec.whatwg.org/#canvasrenderingcontext2d"><dfn>CanvasRenderingContext2D</dfn></a></code>
and <code><a href=
"https://html.spec.whatwg.org/#imagedata"><dfn>ImageData</dfn></a></code>
interfaces, <code><a href=
"https://html.spec.whatwg.org/#canvasimagesource"><dfn>CanvasImageSource</dfn></a></code>
typedef, and <code><a href=
"https://html.spec.whatwg.org/#videotrack"><dfn>VideoTrack</dfn></a></code>
interface are defined in [[!HTML]].
</p>
<p>
The <code><a href=
"https://people.mozilla.org/~jorendorff/es6-draft.html#sec-arraybuffer-objects">
<dfn>ArrayBuffer</dfn></a></code> and <code><a href=
"https://people.mozilla.org/~jorendorff/es6-draft.html#sec-uint16array">
<dfn>Uint16Array</dfn></a></code> types are defined in [[!ECMASCRIPT]].
</p>
</section>
<section>
<h2>
Terminology
</h2>
<p>
The term <dfn>depth+video stream</dfn> means a <a>MediaStream</a>
object that contains one or more <a>MediaStreamTrack</a> objects of
kind "<code>depth</code>" (<a>depth stream track</a>) and one or more
<a>MediaStreamTrack</a> objects of kind "<code>video</code>" (<a>video
stream track</a>).
</p>
<p>
The term <dfn>depth-only stream</dfn> means a <a>MediaStream</a> object
that contains one or more <a>MediaStreamTrack</a> objects of kind
"<code>depth</code>" (<a>depth stream track</a>) only.
</p>
<p>
The term <dfn>video-only stream</dfn> means a <a>MediaStream</a> object
that contains one or more <a>MediaStreamTrack</a> objects of kind
"<code>video</code>" (<a>video stream track</a>) only, and optionally
of kind "<code>audio</code>".
</p>
<p>
The term <dfn>depth stream track</dfn> means a <a>MediaStreamTrack</a>
object whose kind is "<code>depth</code>". It represents a media stream
track whose <a href=
"http://w3c.github.io/mediacapture-main/#dfn-source">source</a> is a
depth camera.
</p>
<p>
The term <dfn>video stream track</dfn> means a <a>MediaStreamTrack</a>
object whose kind is "<code>video</code>". It represents a media stream
track whose <a href=
"http://w3c.github.io/mediacapture-main/#dfn-source">source</a> is a
video camera.
</p>
<section>
<h2>
Depth map
</h2>
<p>
A <dfn>depth map</dfn> is an abstract representation of a frame of a
<a>depth stream track</a>. A <a>depth map</a> is an image that
contains information relating to the distance of the surfaces of
scene objects from a viewpoint.
</p>
<p>
A <a>depth map</a> has an associated <dfn>focal length</dfn> which is
a double. It represents the focal length of the camera in
millimeters.
</p>
<p>
A <a>depth map</a> has an associated <dfn>horizontal field of
view</dfn> which is a double. It represents the horizontal angle of
view in degrees.
</p>
<p>
A <a>depth map</a> has an associated <dfn>vertical field of
view</dfn> which is a double. It represents the vertical angle of
view in degrees.
</p>
<p>
A <a>depth map</a> has an associated <dfn>unit</dfn> which is a
string. It represents the <a>active depth map unit</a>.
</p>
<p>
A <a>depth map</a> has an associated <dfn>near value</dfn> which is a
double. It represents the minimum range in <a>active depth map
unit</a>s.
</p>
<p>
A <a>depth map</a> has an associated <dfn>far value</dfn> which is a
double. It represents the maximum range in <a>active depth map
unit</a>s.
</p>
</section>
</section>
<section>
<h2>
Extensions
</h2>
<section>
<h2>
<code><a>MediaStreamConstraints</a></code> dictionary
</h2>
<pre class="idl">
partial dictionary MediaStreamConstraints {
(boolean or MediaTrackConstraints) depth = false;
};
</pre>
<p dfn-for="MediaStreamConstraints" link-for="MediaStreamConstraints">
If the <dfn><code>depth</code></dfn> dictionary member has the value
true, the <a>MediaStream</a> returned by the <a>getUserMedia()</a>
method MUST contain a <a>depth stream track</a>. If the <a>depth</a>
dictionary member is set to false, is not provided, or is set to
null, the <a>MediaStream</a> MUST NOT contain a <a>depth stream
track</a>.
</p>
</section>
<section>
<h2>
<code><a>MediaTrackConstraints</a></code> dictionary
</h2>
<pre class="idl">
enum DepthMapUnit {
"mm",
"m"
};
</pre>
<p>
The <code><a>DepthMapUnit</a></code> enumeration represents the
possible <dfn>depth map unit</dfn>s for a <a>depth map</a>. The
"<code>mm</code>" value indicates millimeters, the "<code>m</code>"
value indicates meters.
</p>
<pre class="idl">
partial dictionary MediaTrackConstraints {
DepthMapUnit unit = "mm";
};
</pre>
<p dfn-for="MediaTrackConstraints" link-for="MediaTrackConstraints">
If the <dfn><code>unit</code></dfn> dictionary member value is one of
the possible <a>depth map unit</a>s, it becomes the <dfn>active depth
map unit</dfn> for the <a>depth stream track</a>. Otherwise, the
<a>active depth map unit</a> is "<code>mm</code>".
</p>
</section>
<section>
<h2>
<code>MediaStream</code> interface
</h2>
<pre class="idl">
partial interface MediaStream {
sequence<MediaStreamTrack> getDepthTracks();
};
</pre>
<p dfn-for="MediaStream" link-for="MediaStream">
The <code><dfn>getDepthTracks</dfn>()</code> method, when invoked,
MUST return a sequence of <a data-lt="depth stream track">depth
stream tracks</a> in this stream.
</p>
<p>
The <dfn><code>getDepthTracks()</code></dfn> method MUST return a
sequence that represents a snapshot of all the
<a><code>MediaStreamTrack</code></a> objects in this stream's track
set whose <a><code>kind</code></a> is equal to "<code>depth</code>".
The conversion from the track set to the sequence is <a>user
agent</a> defined and the order does not have to be stable between
calls.
</p>
<section class='informative'>
<h3>
Implementation considerations
</h3>
<p>
A <a>video stream track</a> and a <a>depth stream track</a> can be
combined into one <a>depth+video stream</a>. The rendering of the
two tracks are intended to be synchronized. The resolution of the
two tracks are intended to be same. And the coordination of the two
tracks are intended to be calibrated. These are not hard
requirements, since it might not be possible to synchronize tracks
from sources.
</p>
</section>
</section>
<section>
<h2>
<code>MediaStreamTrack</code> interface
</h2>
<p>
The <code><dfn>kind</dfn></code> attribute MUST, on getting, return
the string "<code>depth</code>" if the object represents a <a>depth
stream track</a>.
</p>
</section>
<section>
<h2>
Media provider object
</h2>
<p>
A <a href="https://html.spec.whatwg.org/#media-provider-object">media
provider object</a> can represent a <a>depth-only stream</a> (and
specifically, not a <a>depth+video stream</a>). The <a>user agent</a>
MUST support a <a href=
"https://html.spec.whatwg.org/#media-element">media element</a> with
an <a href=
"https://html.spec.whatwg.org/#assigned-media-provider-object">assigned
media provider object</a> that is a <a>depth-only stream</a>, and in
particular, the <a href=
"https://html.spec.whatwg.org/#dom-media-srcobject"><code>srcObject</code></a>
IDL attribute that allows the <a href=
"https://html.spec.whatwg.org/#media-element">media element</a> to be
assigned a <a href=
"https://html.spec.whatwg.org/#media-provider-object">media provider
object</a> MUST, on setting and getting, behave as specified in
[[!HTML]].
</p>
</section>
<section>
<h2>
The <code>video</code> element
</h2>
<p>
For a <a href=
"https://html.spec.whatwg.org/#the-video-element"><code>video</code>
element</a> whose <a href=
"https://html.spec.whatwg.org/#assigned-media-provider-object">assigned
media provider object</a> is a <a>depth-only stream</a>, the <a>user
agent</a> MUST, for each pixel of the <a href=
"https://html.spec.whatwg.org/#media-data">media data</a> that is
represented by a <a>depth map</a>, <a>convert the depth map value to
grayscale</a> prior to when the <code>video</code> element is
<a href="https://html.spec.whatwg.org/#potentially-playing">potentially
playing</a>.
</p>
<p>
For a <a href=
"https://html.spec.whatwg.org/#the-video-element"><code>video</code>
element</a> whose <a href=
"https://html.spec.whatwg.org/#assigned-media-provider-object">assigned
media provider object</a> is a <a>depth+video stream</a>, the <a>user
agent</a> MUST act as if all the <a>MediaStreamTrack</a>s of kind
"<code>depth</code>" were removed prior to when the
<code>video</code> element is <a href=
"https://html.spec.whatwg.org/#potentially-playing">potentially
playing</a>.
</p>
<p>
The algorithm to <dfn>convert the depth map value to grayscale</dfn>,
given a depth map value <var>d</var>, is as follows:
</p>
<ol>
<li>Let <var>bit depth</var> be the bit depth of the depth map.
</li>
<li>Let <var>near</var> be the the <a>near value</a>.
</li>
<li>Let <var>far</var> be the the <a>far value</a>.
</li>
<li>If <var>bit depth</var> is greater than 8, then apply the
<a>rules to convert using range inverse</a> to <var>d</var> to obtain
quantized value <var>d<sub>8bit</sub>.</var>
</li>
<li>Otherwise, apply the <a>rules to convert using range linear</a>
to <var>d</var> to obtain quantized value
<var>d<sub>8bit</sub></var>.
</li>
<li>Return <var>d<sub>8bit</sub></var>.
</li>
</ol>
<p>
The <dfn>rules to convert using range inverse</dfn> are as given in
the following formula:
</p><img src="images/range-inverse.png" alt="Range inverse"><br>
<img src="images/quantization.png" alt="Quantization">
<p>
The <dfn>rules to convert using range linear</dfn> are as given in
the following formula:
</p><img src="images/range-linear.png" alt="Range linear"><br>
<img src="images/quantization.png" alt="Quantization">
<section>
<h2>
<code>VideoTrack</code> interface
</h2>
<p>
For each <a>depth stream track</a> in the <a>depth-only stream</a>,
the <a>user agent</a> MUST create a corresponding <a>VideoTrack</a>
as defined in [[HTML]].
</p>
</section>
</section>
<section>
<h2>
<code>MediaTrackSettings</code> dictionary
</h2>
<p>
When the <code>getSettings()</code> method is invoked on a <a>depth
stream track</a>, the <a>user agent</a> MUST return the following
dictionary that extends the <a><code>MediaTrackSettings</code></a>
dictionary:
</p>
<pre class="idl">
enum RangeFormat {
"inverse",
"linear"
};
partial dictionary MediaTrackSettings {
double focalLength;
RangeFormat format;
double horizontalFieldOfView;
double verticalFieldOfView;
DepthMapUnit? unit;
double near;
double far;
};
</pre>
<div dfn-for="MediaTrackSettings" link-for="MediaTrackSettings">
<p>
The <dfn><code>focalLength</code></dfn> dictionary member
represents the <a>depth map</a>'s <a>focal length</a>.
</p>
<p>
The <dfn><code>format</code></dfn> dictionary member represents the
depth to grayscale conversion method applied to the <a>depth
map</a>. If the value is "<code>inverse</code>", the <a>rules to
convert using range inverse</a> are applied, and if the value is
"<code>linear</code>", the <a>rules to convert using range
linear</a> are applied.
</p>
<p>
The <dfn><code>horizontalFieldOfView</code></dfn> dictionary member
represents the <a>depth map</a>'s <a>horizontal field of view</a>.
</p>
<p>
The <dfn><code>verticalFieldOfView</code></dfn> dictionary member
represents the <a>depth map</a>'s <a>vertical field of view</a>.
</p>
<p>
The <dfn><code>unit</code></dfn> dictionary member represents the
<a>active depth map unit</a>.
</p>
<p>
The <dfn><code>near</code></dfn> dictionary member represents the
<a>depth map</a>'s <a>near value</a>.
</p>
<p>
The <dfn><code>far</code></dfn> dictionary member represents the
<a>depth map</a>'s <a>far value</a>.
</p>
</div>
</section>
<section>
<h2>
<code>WebGLRenderingContext</code> interface
</h2>
<section class='informative'>
<h3>
Implementation considerations
</h3>
<p>
A <code>video</code> element whose source is a
<a><code>MediaStream</code></a> object containing a <a>depth stream
track</a> may be uploaded to a WebGL texture of format
<code>RGB</code> and type <code>UNSIGNED_BYTE</code>. [[WEBGL]]
</p>
<p>
For each pixel of this WebGL texture, the R component represents
the lower 8 bit value of 16 bit depth value, the G component
represents the upper 8 bit value of 16 bit depth value and the
value in B component is not defined.
</p>
</section>
</section>
</section>
<section class='informative'>
<h2>
Examples
</h2>
<h3>
Playback of depth+video stream
</h3>
<pre class='example highlight'>
navigator.mediaDevices.getUserMedia({
depth: true,
video: true
}).then(function (stream) {
// Wire the media stream into a <video> element for playback.
// The RGB video is rendered.
var video = document.querySelector('#video');
video.srcObject = stream;
video.play();
// Construct a depth-only stream out of the existing depth stream track.
var depthOnlyStream = new MediaStream(s.getDepthTracks()[0]);
// Wire the depth-only stream into another <video> element for playback.
// The depth information is rendered in its grayscale representation.
var depthVideo = document.querySelector('#depthVideo');
depthVideo.srcObject = depthOnlyStream;
depthVideo.play();
}
);
</pre>
<h3>
WebGL Fragment Shader based post-processing
</h3>
<pre class='example highlight'>
// This code sets up a video element from a depth stream, uploads it to a WebGL
// texture, and samples that texture in the fragment shader, reconstructing the
// 16-bit depth values from the red and green channels.
navigator.mediaDevices.getUserMedia({
depth: true,
}).then(function (stream) {
// wire the stream into a <video> element for playback
var depthVideo = document.querySelector('#depthVideo');
depthVideo.srcObject = stream;
depthVideo.play();
}).catch(function (reason) {
// handle gUM error here
});
// ... later, in the rendering loop ...
gl.texImage2D(
gl.TEXTURE_2D,
0,
gl.RGB,
gl.RGB,
gl.UNSIGNED_BYTE,
depthVideo
);
<script id="fragment-shader" type="x-shader/x-fragment">
varying vec2 v_texCoord;
// u_tex points to the texture unit containing the depth texture.
uniform sampler2D u_tex;
uniform float far;
uniform float near;
uniform bool isRangeInverse;
void main() {
vec4 floatColor = texture2D(u_tex, v_texCoord);
float dn = floatColor.r;
float depth = 0.;
if (isRangeInverse) {
depth = far * near / ( far - dn * ( far - near));
} else {
// Otherwise, using range linear
depth = dn * ( far - near ) + near;
}
// ...
}
</script>
</pre>
</section>
<section class='appendix'>
<h2>
Acknowledgements
</h2>
<p>
Thanks to everyone who contributed to the <a href=
"https://www.w3.org/wiki/Media_Capture_Depth_Stream_Extension">Use
Cases and Requirements</a>, sent feedback and comments. Special thanks
to Ningxin Hu for experimental implementations, as well as to the
Project Tango for their experiments.
</p>
</section>
</body>
</html>