From 9eff1f948a7982498d096d93dfe02113ec735b79 Mon Sep 17 00:00:00 2001 From: HubHop Date: Thu, 23 Dec 2021 15:03:22 +1100 Subject: [PATCH] release pretrained weights --- README.md | 42 +++++++++++++++++++++++++----------------- config/hvt-s-2.json | 15 +++++++++++++++ config/hvt-s-3.json | 15 +++++++++++++++ 3 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 config/hvt-s-2.json create mode 100644 config/hvt-s-3.json diff --git a/README.md b/README.md index 9d3d561..591053c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # Scalable Vision Transformers with Hierarchical Pooling +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + + This is the official PyTorch implementation of ICCV 2021 paper: **Scalable Vision Transformers with Hierarchical Pooling**. By [Zizheng Pan](https://scholar.google.com.au/citations?user=w_VMopoAAAAJ&hl=en), [Bohan Zhuang](https://sites.google.com/view/bohanzhuang), [Jing Liu](https://sites.google.com/view/jing-liu/首页), [Haoyu He](https://scholar.google.com/citations?user=aU1zMhUAAAAJ&hl=en), and [Jianfei Cai](https://scholar.google.com/citations?user=N6czCoUAAAAJ&hl=en). @@ -13,11 +16,13 @@ In our [paper](https://arxiv.org/abs/2103.10619), we propose a Hierarchical Visu If you use this code for a paper please cite: ``` -@article{pan2021scalable, - title={Scalable vision transformers with hierarchical pooling}, - author={Pan, Zizheng and Zhuang, Bohan and Liu, Jing and He, Haoyu and Cai, Jianfei}, - journal={arXiv preprint arXiv:2103.10619}, - year={2021} +@InProceedings{Pan_2021_ICCV, + author = {Pan, Zizheng and Zhuang, Bohan and Liu, Jing and He, Haoyu and Cai, Jianfei}, + title = {Scalable Vision Transformers With Hierarchical Pooling}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2021}, + pages = {377-386} } ``` @@ -107,21 +112,24 @@ You can scale a HVT model with various settings, which is supported in the confi ### Main Results -| Name | FLOPs (G) | Params (M) | Top-1 Acc. (%) | Top-5 Acc. (%) | -| -------------- | --------- | ---------- | -------------- | -------------- | -| HVT-Ti-1 | 0.64 | 5.74 | 69.64 | 89.40 | -| Scale HVT-Ti-4 | 1.39 | 22.12 | 75.23 | 92.30 | -| HVT-S-1 | 2.40 | 22.09 | 78.00 | 93.83 | +| Name | FLOPs (G) | Params (M) | Top-1 Acc. (%) | Top-5 Acc. (%) | Model | Log | +| -------------- | --------- | ---------- | -------------- | -------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| HVT-Ti-1 | 0.64 | 5.74 | 69.64 | 89.40 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/hvt_ti_1.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_hvt_ti_1) | +| Scale HVT-Ti-4 | 1.39 | 22.12 | 75.23 | 92.30 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/scale_hvt_ti_4.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_scale_hvt_ti_4) | +| HVT-S-1 | 2.40 | 22.09 | 78.00 | 93.83 | - | - | + ### More Pooling Stages with HVT-S -| Name | FLOPs (G) | Params (M) | Top-1 Acc. (%) | Top-5 Acc. (%) | -| ------- | --------- | ---------- | -------------- | -------------- | -| HVT-S-0 | 4.57 | 22.05 | 80.39 | 95.13 | -| HVT-S-1 | 2.40 | 22.09 | 78.00 | 93.83 | -| HVT-S-2 | 1.94 | 22.11 | 77.36 | 93.55 | -| HVT-S-3 | 1.62 | 22.11 | 76.32 | 92.90 | -| HVT-S-4 | 1.39 | 22.12 | 75.23 | 92.30 | +| Name | FLOPs (G) | Params (M) | Top-1 Acc. (%) | Top-5 Acc. (%) | Model | Log | +| ------- | --------- | ---------- | -------------- | -------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| HVT-S-0 | 4.57 | 22.05 | 80.39 | 95.13 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/hvt_s_0.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_hvt_s_0) | +| HVT-S-1 | 2.40 | 22.09 | 78.00 | 93.83 | - | - | +| HVT-S-2 | 1.94 | 22.11 | 77.36 | 93.55 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/hvt_s_2.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_hvt_s_2) | +| HVT-S-3 | 1.62 | 22.11 | 76.32 | 92.90 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/hvt_s_3.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_hvt_s_3) | +| HVT-S-4 | 1.39 | 22.12 | 75.23 | 92.30 | [github](https://github.com/zhuang-group/HVT/releases/download/v1.0/scale_hvt_ti_4.pth) | [log](https://github.com/zhuang-group/HVT/releases/download/v1.0/log_scale_hvt_ti_4) | + + For CIFAR-100 results, please check out our [paper](https://arxiv.org/abs/2103.10619) for more details. diff --git a/config/hvt-s-2.json b/config/hvt-s-2.json new file mode 100644 index 0000000..120e8d9 --- /dev/null +++ b/config/hvt-s-2.json @@ -0,0 +1,15 @@ +{ + "model": "hvt_model", + "batch_size": 128, + "exp_name": "hvt-s-1", + "input_size": 224, + "patch_size": 16, + "num_heads": 6, + "head_dim": 64, + "num_blocks": 12, + "num_workers": 10, + "pool_kernel_size": 3, + "pool_stride": 2, + "pool_block_width": 6, + "weight_decay": 0.025 +} \ No newline at end of file diff --git a/config/hvt-s-3.json b/config/hvt-s-3.json new file mode 100644 index 0000000..e70e81c --- /dev/null +++ b/config/hvt-s-3.json @@ -0,0 +1,15 @@ +{ + "model": "hvt_model", + "batch_size": 128, + "exp_name": "hvt-s-3", + "input_size": 224, + "patch_size": 16, + "num_heads": 6, + "head_dim": 64, + "num_blocks": 12, + "num_workers": 10, + "pool_kernel_size": 3, + "pool_stride": 2, + "pool_block_width": 4, + "weight_decay": 0.025 +} \ No newline at end of file