forked from shiwendai/Faiss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStandardGpuResources.h
112 lines (79 loc) · 3.3 KB
/
StandardGpuResources.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD+Patents license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include "GpuResources.h"
#include "utils/StackDeviceMemory.h"
#include "utils/DeviceUtils.h"
#include <unordered_map>
#include <vector>
namespace faiss { namespace gpu {
/// Default implementation of GpuResources that allocates a cuBLAS
/// stream and 2 streams for use, as well as temporary memory
class StandardGpuResources : public GpuResources {
public:
StandardGpuResources();
~StandardGpuResources() override;
/// Disable allocation of temporary memory; all temporary memory
/// requests will call cudaMalloc / cudaFree at the point of use
void noTempMemory();
/// Specify that we wish to use a certain fixed size of memory on
/// all devices as temporary memory
void setTempMemory(size_t size);
/// Specify that we wish to use a certain fraction of memory on
/// all devices as temporary memory
void setTempMemoryFraction(float fraction);
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
/// transfers
void setPinnedMemory(size_t size);
/// Called to change the stream for work ordering
void setDefaultStream(int device, cudaStream_t stream);
/// Called to change the work ordering streams to the null stream
/// for all devices
void setDefaultNullStreamAllDevices();
/// Enable or disable the warning about not having enough temporary memory
/// when cudaMalloc gets called
void setCudaMallocWarning(bool b);
public:
/// Internal system calls
void initializeForDevice(int device) override;
cublasHandle_t getBlasHandle(int device) override;
cudaStream_t getDefaultStream(int device) override;
std::vector<cudaStream_t> getAlternateStreams(int device) override;
DeviceMemory& getMemoryManager(int device) override;
std::pair<void*, size_t> getPinnedMemory() override;
cudaStream_t getAsyncCopyStream(int device) override;
private:
/// Our default stream that work is ordered on, one per each device
std::unordered_map<int, cudaStream_t> defaultStreams_;
/// This contains particular streams as set by the user for
/// ordering, if any
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
/// Other streams we can use, per each device
std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
/// Async copy stream to use for GPU <-> CPU pinned memory copies
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
/// cuBLAS handle for each device
std::unordered_map<int, cublasHandle_t> blasHandles_;
/// Temporary memory provider, per each device
std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
/// Pinned memory allocation for use with this GPU
void* pinnedMemAlloc_;
size_t pinnedMemAllocSize_;
/// By default, we reserve this fraction of memory on all devices
float tempMemFraction_;
/// Another option is to use a specified amount of memory on all
/// devices
size_t tempMemSize_;
/// Whether we look at tempMemFraction_ or tempMemSize_
bool useFraction_;
/// Amount of pinned memory we should allocate
size_t pinnedMemSize_;
/// Whether or not a warning upon cudaMalloc is generated
bool cudaMallocWarning_;
};
} } // namespace