-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathtensorNet.cpp
117 lines (90 loc) · 3.6 KB
/
tensorNet.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <algorithm>
#include "tensorNet.h"
void TensorNet::caffeToTRTModel(const std::string& deployFile,
const std::string& modelFile,
const std::vector<std::string>& outputs,
unsigned int maxBatchSize)
{
IBuilder* builder = createInferBuilder(gLogger);
INetworkDefinition* network = builder->createNetwork();
ICaffeParser* parser = createCaffeParser();
parser->setPluginFactory(&pluginFactory);
bool useFp16 = builder->platformHasFastFp16();
DataType modelDataType = useFp16 ? DataType::kHALF : DataType::kFLOAT;
const IBlobNameToTensor *blobNameToTensor = parser->parse(deployFile.c_str(),
modelFile.c_str(),
*network,
modelDataType);
assert(blobNameToTensor != nullptr);
for (auto& s : outputs) network->markOutput(*blobNameToTensor->find(s.c_str()));
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(16 << 20);
if(useFp16) builder->setHalf2Mode(true);
ICudaEngine* engine = builder->buildCudaEngine(*network);
assert(engine);
network->destroy();
parser->destroy();
gieModelStream = engine->serialize();
engine->destroy();
builder->destroy();
pluginFactory.destroyPlugin();
shutdownProtobufLibrary();
}
void TensorNet::createInference()
{
infer = createInferRuntime(gLogger);
engine = infer->deserializeCudaEngine(gieModelStream->data(), gieModelStream->size(), &pluginFactory);
printf("Bindings after deserializing:\n");
for (int bi = 0; bi < engine->getNbBindings(); bi++) {
if (engine->bindingIsInput(bi) == true) printf("Binding %d (%s): Input.\n", bi, engine->getBindingName(bi));
else printf("Binding %d (%s): Output.\n", bi, engine->getBindingName(bi));
}
}
void TensorNet::imageInference(void** buffers, int nbBuffer, int batchSize)
{
assert(engine->getNbBindings()==nbBuffer);
IExecutionContext* context = engine->createExecutionContext();
context->setProfiler(&gProfiler);
context->execute(batchSize, buffers);
context->destroy();
}
void TensorNet::timeInference(int iteration, int batchSize)
{
int inputIdx = 0;
size_t inputSize = 0;
void* buffers[engine->getNbBindings()];
for (int b = 0; b < engine->getNbBindings(); b++) {
DimsCHW dims = static_cast<DimsCHW&&>(engine->getBindingDimensions(b));
size_t size = batchSize * dims.c() * dims.h() * dims.w() * sizeof(float);
CHECK(cudaMalloc(&buffers[b], size));
if(engine->bindingIsInput(b) == true)
{
inputIdx = b;
inputSize = size;
}
}
IExecutionContext* context = engine->createExecutionContext();
context->setProfiler(&gProfiler);
CHECK(cudaMemset(buffers[inputIdx], 0, inputSize));
for (int i = 0; i < iteration;i++) context->execute(batchSize, buffers);
context->destroy();
for (int b = 0; b < engine->getNbBindings(); b++) CHECK(cudaFree(buffers[b]));
}
DimsCHW TensorNet::getTensorDims(const char* name)
{
for (int b = 0; b < engine->getNbBindings(); b++) {
if( !strcmp(name, engine->getBindingName(b)) )
return static_cast<DimsCHW&&>(engine->getBindingDimensions(b));
}
return DimsCHW{0,0,0};
}
void TensorNet::printTimes(int iteration)
{
gProfiler.printLayerTimes(iteration);
}
void TensorNet::destroy()
{
pluginFactory.destroyPlugin();
engine->destroy();
infer->destroy();
}