-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdblp-cache.json
170 lines (170 loc) · 7.57 KB
/
dblp-cache.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
{
"ServerlessLLM: Locality-Enhanced Serverless Inference for Large Language Models": {
"source": "DBLP",
"url": "https://doi.org/10.48550/arXiv.2401.14351"
},
"CoActo: CoActive Neural Network Inference Offloading with Fine-grained and Concurrent Execution": {
"source": "DBLP",
"url": "https://doi.org/10.1145/3643832.3661885"
},
"Optimizing Dynamic Neural Networks with Brainstorm": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/osdi23/presentation/cui"
},
"No Provisioned Concurrency: Fast RDMA-codesigned Remote Fork for Serverless Computing": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/osdi23/presentation/wei-rdma"
},
"RainbowCake: Mitigating Cold-starts in Serverless with Layer-wise Container Caching and Sharing": {
"source": "DBLP",
"url": "https://doi.org/10.1145/3617232.3624871"
},
"Zeus: Understanding and Optimizing GPU Energy Consumption of DNN Training": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/nsdi23/presentation/you"
},
"PockEngine: Sparse and Efficient Fine-tuning in a Pocket": {
"source": "DBLP",
"url": "https://doi.org/10.1145/3613424.3614307"
},
"Liquid: Intelligent Resource Estimation and Network-Efficient Scheduling for Deep Learning Jobs on DistributeGPU Clusters": null,
"Astrea: Auto-Serverless Analytics Towards Cost-Efficiency and QoS-Awareness": {
"source": "DBLP",
"url": "https://doi.org/10.1109/TPDS.2022.3172069"
},
"Maximizing the Utilization of GPUs Used by Cloud Gaming through Adaptive Co-location with Combo": {
"source": "DBLP",
"url": "https://doi.org/10.1145/3620678.3624660"
},
"Bolt: Bridging the Gap between Auto-tuners and Hardware-native Performance": {
"source": "DBLP",
"url": "https://proceedings.mlsys.org/paper_files/paper/2022/hash/1f8053a67ec8e0b57455713cefdd8218-Abstract.html"
},
"SpaceEvo: Hardware-Friendly Search Space Design for Efficient INT8 Inference": {
"source": "OpenAlex",
"url": "https://doi.org/10.48550/arxiv.2303.08308"
},
"Amanda: Unified Instrumentation Framework for Deep Neural Networks": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3617232.3624864"
},
"Golgi: Performance-Aware, Resource-Efficient Function Scheduling for Serverless Computing": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3620678.3624645"
},
"Cost-effective On-device Continual Learning over Memory Hierarchy with Miro": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3570361.3613297"
},
"DepGraph: Towards Any Structural Pruning": {
"source": "OpenAlex",
"url": "https://doi.org/10.1109/cvpr52729.2023.01544"
},
"NN-Stretch: Automatic Neural Network Branching for Parallel Inference on Heterogeneous Multi-Processors": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3581791.3596870"
},
"LUT-NN: Empower Efficient Neural Network Inference with Centroid Learning and Table Lookup": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3570361.3613285"
},
"AQUATOPE: QoS-and-Uncertainty-Aware Resource Management for Multi-stage Serverless Workflows": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3567955.3567960"
},
"ElasticTrainer: Speeding Up On-Device Training with Runtime Elastic Tensor Selection": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3581791.3596852"
},
"INFless: A Native Serverless System for Low-Latency, High-Throughput Inference": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3503222.3507709"
},
"ConvReLU++: Reference-based Lossless Acceleration of Conv-ReLU Operations on Mobile CPU": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3581791.3596831"
},
"BeeHive: Sub-second Elasticity for Web Services with Semi-FaaS Execution": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3575693.3575752"
},
"Understanding and Optimizing Deep Learning Cold-Start Latency on Edge Devices": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.48550/arXiv.2206.07446"
},
"ModelKeeper: Accelerating DNN Training via Automated Training Warmup": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/nsdi23/presentation/lai-fan"
},
"AStitch: Enabling a New Multi-dimensional Optimization Space for Memory-Intensive ML Training and Inferencon Modern SIMT Architectures": null,
"Hyperion: A Generic and Distributed Mobile Offloading Framework on OpenCL": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3560905.3568511"
},
"ROLLER: Fast and Efficient Tensor Compilation for Deep Learning": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/osdi22/presentation/zhu"
},
"TLP: A Deep Learning-Based Cost Model for Tensor Program Tuning": {
"source": "OpenAlex",
"url": "https://doi.org/10.1145/3575693.3575737"
},
"BlastNet: Exploiting Duo-Blocks for Cross-Processor Real-Time DNN Inference": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3560905.3568520"
},
"Melon: Breaking the Memory Wall for Resource-Efficient On-Device Machine Learning": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3498361.3538928"
},
"Romou: rapidly generate high-performance tensor kernels for mobile GPUs": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3495243.3517020"
},
"Real-time neural network inference on extremely weak devices: agile offloading with explainable AI": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3495243.3560551"
},
"Band: coordinated multi-DNN inference on heterogeneous mobile processors": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3498361.3538948"
},
"CoDL: Efficient CPU-GPU Co-Execution for Deep Learning Inference on Mobile Devices": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3498361.3538932"
},
"nn-Meter: Towards Accurate Latency Prediction of Deep-Learning Model Inference on Diverse Edge Devices": {
"source": "Semantic Scholar",
"url": "https://doi.org/10.1145/3458864.3467882"
},
"Cachew: Machine Learning Input Data Processing as a Service": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/atc22/presentation/graur"
},
"Campo: Cost-Aware Performance Optimization for Mixed-Precision Neural Network Training": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/atc22/presentation/he"
},
"Soter: Guarding Black-box Inference for General Neural Networks at the Edge": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/atc22/presentation/shen"
},
"Microsecond-scale Preemption for Concurrent GPU-accelerated DNN Inference": null,
"Tetris: Memory-efficient Serverless Inference through Tensor Sharing": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/atc22/presentation/li-jie"
},
"Serving Heterogeneous Machine Learning Models on Multi-GPU Servers with Spatio-Temporal Sharing": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/atc22/presentation/choi-seungbeom"
},
"Doing More with Less: Orchestrating Serverless Applications without an Orchestrator": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/nsdi23/presentation/liu-david"
},
"nnPerf: A Real-time On-device Tool Profiling DNN Inference on Mobile Platforms": null,
"Can't Be Late: Optimizing Spot Instance Savings under Deadlines": {
"source": "DBLP",
"url": "https://www.usenix.org/conference/nsdi24/presentation/wu-zhanghao"
}
}