custom head

ziw-liu · ziw-liu · commit 01c71cf18148 · 2024-03-02T11:04:20.000-08:00
diff --git a/tests/unet/test_fcmae.py b/tests/unet/test_fcmae.py
@@ -2,6 +2,7 @@
 
 from viscy.unet.networks.fcmae import (
     FullyConvolutionalMAE,
+    PixelToVoxelShuffleHead,
     MaskedAdaptiveProjection,
     MaskedConvNeXtV2Block,
     MaskedConvNeXtV2Stage,
@@ -104,6 +105,13 @@ def test_masked_multiscale_encoder():
         assert afeat.shape[2] == afeat.shape[3] == xy_size // stride
 
 
+def test_pixel_to_voxel_shuffle_head():
+    head = PixelToVoxelShuffleHead(240, 3, out_stack_depth=5, xy_scaling=4)
+    x = torch.rand(2, 240, 16, 16)
+    y = head(x)
+    assert y.shape == (2, 3, 5, 64, 64)
+
+
 def test_fcmae():
     x = torch.rand(2, 3, 5, 128, 128)
     model = FullyConvolutionalMAE(3, 3)
diff --git a/viscy/unet/networks/fcmae.py b/viscy/unet/networks/fcmae.py
@@ -5,6 +5,7 @@
 and timm's dense implementation of the encoder in ``timm.models.convnext``
 """
 
+import math
 from typing import Sequence
 
 import torch
@@ -416,7 +417,7 @@ def __init__(
         )
         decoder_channels = list(dims)
         decoder_channels.reverse()
-        decoder_channels[-1] = (in_stack_depth + 2) * in_channels * 2**2
+        decoder_channels[-1] = out_channels * in_stack_depth * stem_kernel_size[-1] ** 2
         self.decoder = Unet2dDecoder(
             decoder_channels,
             norm_name="instance",
@@ -433,7 +434,8 @@ def __init__(
             pool=True,
         )
         self.out_stack_depth = in_stack_depth
-        self.num_blocks = 6
+        # TODO: replace num_blocks with explicit strides for all models
+        self.num_blocks = len(dims) * int(math.log2(stem_kernel_size[-1]))
         self.pretraining = pretraining
 
     def forward(self, x: Tensor, mask_ratio: float = 0.0) -> Tensor: