diff --git a/inference-engine/samples/speech_sample/README.md b/inference-engine/samples/speech_sample/README.md index 293f3240f645e9..7c12a375112adb 100644 --- a/inference-engine/samples/speech_sample/README.md +++ b/inference-engine/samples/speech_sample/README.md @@ -2,7 +2,7 @@ This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors. -The sample works with Kaldi ARK files only, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores. +The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores. Automatic Speech Recognition C++ sample application demonstrates how to use the following Inference Engine C++ API in applications: @@ -27,8 +27,8 @@ Basic Inference Engine API is covered by [Hello Classification C++ sample](../he ## How It Works -Upon the start-up, the application reads command line parameters and loads a Kaldi-trained neural network along with Kaldi ARK speech feature vector file to the Inference Engine plugin. Then it performs inference on all speech utterances stored in the input ARK file. Context-windowed speech frames are processed in batches of 1-8 -frames according to the `-bs` parameter. Batching across utterances is not supported by this sample. When inference is done, the application creates an output ARK file. If the `-r` option is given, error +Upon the start-up, the application reads command line parameters, loads a specified model and input data to the Inference Engine plugin, performs synchronous inference on all speech utterances stored in the input file. Context-windowed speech frames are processed in batches of 1-8 +frames according to the `-bs` parameter. Batching across utterances is not supported by this sample. When inference is done, the application creates an output file. If the `-r` option is given, error statistics are provided for each speech utterance as shown above. You can see the explicit description of @@ -43,7 +43,7 @@ Several parameters control neural network quantization. The `-q` flag determines Three modes are supported: - *static* - The first -utterance in the input ARK file is scanned for dynamic range. The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used +utterance in the input file is scanned for dynamic range. The scale factor (floating point scalar multiplier) required to scale the maximum input value of the first utterance to 16384 (15 bits) is used for all subsequent inputs. The neural network is quantized to accommodate the scaled input dynamic range. - *dynamic* - The user may specify a scale factor via the `-sf` flag that will be used for static quantization. - *user-defined* - The scale factor for each input batch is computed @@ -99,17 +99,17 @@ speech_sample [OPTION] Options: -h Print a usage message. - -i "" Required. Paths to .ark files. Example of usage: or . + -i "" Required. Paths to input files. Example of usage: or or . -m "" Required. Path to an .xml file with a trained model (required if -rg is missing). - -o "" Optional. Output file name to save ark scores. + -o "" Optional. Output file name to save scores. Example of usage: or -d "" Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. The sample will look for a suitable plugin for device specified. -pc Optional. Enables per-layer performance report. - -q "" Optional. Input quantization mode: "static" (default), "dynamic", or "user" (use with -sf). + -q "" Optional. Input quantization mode: static (default), dynamic, or user (use with -sf). -qb "" Optional. Weight bits for quantization: 8 or 16 (default) -sf "" Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas. -bs "" Optional. Batch size 1-8 (default 1) - -r "" Optional. Read reference score .ark file and compare scores. + -r "" Optional. Read referefile and compare scores. Example of usage: or -rg "" Read GNA model from file using path/filename provided (required if -m is missing). -wg "" Optional. Write GNA model to file using path/filename provided. -we "" Optional. Write GNA embedded model to file using path/filename provided. @@ -118,10 +118,9 @@ Options: If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored. -cw_r "" Optional. Number of frames for right context windows (default is 0). Works only with context window networks. If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored. - -oname "" Optional. Layer names for output blobs. The names are separated with ",". Allows to change the order of output layers for -o flag. - Example: Output1:port,Output2:port. - -iname "" Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of input layers for -i flag. - Example: Input1,Input2 + -oname "" Optional. Layer names for output blobs. The names are separated with "," Example: Output1:port,Output2:port + -iname "" Optional. Layer names for input blobs. The names are separated with "," Example: Input1,Input2 + -pwl_me "" Optional. The maximum percent of error for PWL function.The value must be in <0, 100> range. The default value is 1.0. Available target devices: @@ -169,7 +168,7 @@ All of them can be downloaded from [https://storage.openvinotoolkit.org/models_c ## Sample Output -The acoustic log likelihood sequences for all utterances are stored in the Kaldi ARK file, `scores.ark`. If the `-r` option is used, a report on the statistical score error is generated for each utterance such as +The acoustic log likelihood sequences for all utterances are stored in the file. Example `scores.ark` or `scores.npz`. If the `-r` option is used, a report on the statistical score error is generated for each utterance such as the following: ```sh diff --git a/inference-engine/samples/speech_sample/fileutils.cpp b/inference-engine/samples/speech_sample/fileutils.cpp index 854f2b7c362e6e..9eac5168293516 100644 --- a/inference-engine/samples/speech_sample/fileutils.cpp +++ b/inference-engine/samples/speech_sample/fileutils.cpp @@ -31,8 +31,7 @@ void ArkFile::GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uin } in_file.close(); } else { - fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName); - exit(-1); + throw std::runtime_error(std::string("Failed to open %s for reading in GetFileInfo()!\n") + fileName); } if (ptrNumArrays != NULL) @@ -76,8 +75,7 @@ void ArkFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string& p } in_file.close(); } else { - fprintf(stderr, "Failed to open %s for reading in GetKaldiArkInfo()!\n", fileName); - exit(-1); + throw std::runtime_error(std::string("Failed to open %s for reading in LoadFile()!\n") + fileName); } *ptrNumBytesPerElement = sizeof(float); @@ -100,7 +98,7 @@ void ArkFile::SaveFile(const char* fileName, bool shouldAppend, std::string name out_file.write(reinterpret_cast(ptrMemory), numRows * numColumns * sizeof(float)); out_file.close(); } else { - throw std::runtime_error(std::string("Failed to open %s for writing in SaveKaldiArkArray()!\n") + fileName); + throw std::runtime_error(std::string("Failed to open %s for writing in SaveFile()!\n") + fileName); } } diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp index 9e8358e4baea97..cafe4db5c61758 100644 --- a/inference-engine/samples/speech_sample/speech_sample.hpp +++ b/inference-engine/samples/speech_sample/speech_sample.hpp @@ -14,7 +14,7 @@ static const char help_message[] = "Print a usage message."; /// @brief message for images argument -static const char input_message[] = "Required. Paths to .ark files. Example of usage: or ."; +static const char input_message[] = "Required. Paths to input files. Example of usage: or or ."; /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing)."; @@ -49,10 +49,10 @@ static const char custom_cpu_library_message[] = "Required for CPU plugin custom "Absolute path to a shared library with the kernels implementations."; /// @brief message for score output argument -static const char output_message[] = "Optional. Output file name to save ark scores."; +static const char output_message[] = "Optional. Output file name to save scores. Example of usage: or "; /// @brief message for reference score file argument -static const char reference_score_message[] = "Optional. Read reference score .ark file and compare scores."; +static const char reference_score_message[] = "Optional. Read reference score file and compare scores. Example of usage: or "; /// @brief message for read GNA model argument static const char read_gna_model_message[] = "Read GNA model from file using path/filename provided (required if -m is missing)."; diff --git a/thirdparty/cnpy/CMakeLists.txt b/thirdparty/cnpy/CMakeLists.txt index 1e90f758d073f5..5571cd5f7857c7 100644 --- a/thirdparty/cnpy/CMakeLists.txt +++ b/thirdparty/cnpy/CMakeLists.txt @@ -5,12 +5,10 @@ endif(COMMAND cmake_policy) project(CNPY) -set(BUILD_SHARED_LIBS OFF) set(TARGET_NAME "cnpy") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") add_library(cnpy STATIC "cnpy.cpp") -if(NOT WIN32) +if(NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-all") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all") target_compile_options(${TARGET_NAME} PUBLIC -Wno-unused-variable) diff --git a/thirdparty/zlib/CMakeLists.txt b/thirdparty/zlib/CMakeLists.txt index e77a00ff50bfab..b24d4abf323cd0 100644 --- a/thirdparty/zlib/CMakeLists.txt +++ b/thirdparty/zlib/CMakeLists.txt @@ -1,23 +1,16 @@ PROJECT(zlib) if(NOT WIN32) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-all") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-all") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all") endif() -if (MSVC) - # Build with multiple processes - add_definitions(/MP) - # MSVC warning suppressions - add_definitions( - /wd4996 # The compiler encountered a deprecated declaration. - ) -endif (MSVC) +if(CMAKE_C_COMPILER_ID STREQUAL "MSVC") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MP /wd4996 /W3") +endif() -set(BUILD_SHARED_LIBS OFF) set(TARGET_NAME "zlib") -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/zlib") set(lib_srcs zlib/adler32.c @@ -51,17 +44,9 @@ set(lib_hdrs set(lib_ext_hdrs "zlib/zlib.h" "zlib/zconf.h") add_library(${TARGET_NAME} STATIC ${lib_srcs} ${lib_hdrs} ${lib_ext_hdrs}) -target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/zlib" - "${CMAKE_CURRENT_SOURCE_DIR}/zlib/..") -if(MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W3") -endif() -if(UNIX) - if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - endif() -endif() +target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/zlib" + "${CMAKE_CURRENT_SOURCE_DIR}/zlib/..") set_target_properties(zlib PROPERTIES FOLDER thirdparty) \ No newline at end of file