Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ build
build*
3rdparty/tensorflow
app/AccuracyImgNet/imgs
benchmark_results/
docs/
docs/input
docs/mnist
1 change: 1 addition & 0 deletions app/Graph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ target_include_directories(BuildGraph PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/Json/i

add_executable(Graph_Build graph_build.cpp)
target_link_libraries(Graph_Build BuildGraph)
target_link_libraries(Graph_Build graphT_lib)

add_executable(ACC acc_check.cpp)
target_link_libraries(ACC BuildGraph)
Expand Down
53 changes: 52 additions & 1 deletion app/Graph/graph_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,55 @@
#include <unordered_map>

#include "build.hpp"
#include "graph_transformations/graph_transformations.hpp"
#include "layers_fused/ConvRelu.hpp"

namespace fs = std::filesystem;
using namespace it_lab_ai;

namespace {

enum class FusionMode { kOff, kPostops, kConvRelu };

FusionMode parse_fusion_mode(const std::string& value) {
if (value == "off") {
return FusionMode::kOff;
}
if (value == "postops") {
return FusionMode::kPostops;
}
if (value == "convrelu") {
return FusionMode::kConvRelu;
}
throw std::invalid_argument("Unknown fusion mode: " + value);
}

void apply_conv_relu_fusion(Graph& graph, Tensor& output,
const RuntimeOptions& options) {
if (options.backend == Backend::kOneDnn) {
throw std::invalid_argument(
"convrelu fusion is not supported with oneDNN backend");
}

Graph subgraph;
Tensor dummy_input = make_tensor(std::vector<int>({0}));
auto conv = std::make_shared<ConvolutionalLayer>();
auto relu = std::make_shared<EWLayer>("relu");
subgraph.setInput(conv, dummy_input);
subgraph.makeConnection(conv, relu);

Graph fused_graph;
auto fused_layer = std::make_shared<ConvReluLayer>();
changed_subgraphs(graph, subgraph, fused_layer, fused_graph, output, options);
graph = std::move(fused_graph);
}

} // namespace

int main(int argc, char* argv[]) {
std::string model_name = "alexnet_mnist";
RuntimeOptions options;
FusionMode fusion_mode = FusionMode::kPostops;

for (int i = 1; i < argc; ++i) {
if (std::string(argv[i]) == "--model" && i + 1 < argc) {
Expand Down Expand Up @@ -47,6 +89,8 @@ int main(int argc, char* argv[]) {
}
} else if (std::string(argv[i]) == "--threads" && i + 1 < argc) {
options.threads = std::stoi(argv[++i]);
} else if (std::string(argv[i]) == "--fusion" && i + 1 < argc) {
fusion_mode = parse_fusion_mode(argv[++i]);
}
}

Expand Down Expand Up @@ -92,7 +136,11 @@ int main(int argc, char* argv[]) {
std::vector<float> vec(75, 3);
it_lab_ai::Tensor output = it_lab_ai::make_tensor(vec, sh1);
Graph graph;
build_graph_linear(graph, input, output, options, true);
build_graph_linear(graph, input, output, options, true,
fusion_mode == FusionMode::kPostops);
if (fusion_mode == FusionMode::kConvRelu) {
apply_conv_relu_fusion(graph, output, options);
}

std::cout << "Starting inference..." << '\n';
try {
Expand Down Expand Up @@ -133,6 +181,9 @@ int main(int argc, char* argv[]) {

Graph graph;
build_graph(graph, input, output, json_path, options, false);
if (fusion_mode == FusionMode::kConvRelu) {
apply_conv_relu_fusion(graph, output, options);
}

std::cout << "Starting inference..." << '\n';
try {
Expand Down
60 changes: 60 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Model Performance Benchmark

`model_performance.py` benchmarks the existing `Graph_Build` executable for all
target networks:

- `alexnet_mnist`
- `googlenet`
- `densenet`
- `resnet`
- `yolo`

It measures wall time and RSS memory timeline for two stages:

- `compile`: process start until `Graph_Build` prints `Starting inference...`
- `inference`: `Starting inference...` until `Inference completed successfully.`

The benchmark does not modify C++ code. It reads the executable output live,
samples process memory while the command is running, stores the full RSS sample
series, and writes a memory plot for every measured run.

Install `matplotlib` to generate memory plots. Install `psutil` to measure RSS
for the full process tree on every platform. Without `psutil`, Linux uses
`/proc`, while macOS and Windows use parent-process RSS fallbacks.

## Usage

Build the project first:

```bash
cmake -S . -B build
cmake --build build --target Graph_Build --parallel
```

Run the default benchmark over every model with available JSON/input assets:

```bash
python3 benchmarks/model_performance.py
```

Run selected models and variants:

```bash
python3 benchmarks/model_performance.py \
--model googlenet,resnet \
--variant target \
--repeat 3 \
--warmup 1
```

The JSON report includes `memory_samples` for every run. PNG plots are written
to `benchmark_results/memory_plots` by default. Use `--samples-csv-out` to export
the memory timeline to CSV and `--plots-dir` to choose another plot directory.

Use `--variant target` for the full target matrix: every supported parallel
backend with fusion off/on, plus oneDNN with fusion off/on. Fusion-on uses the
existing `Conv+Relu` fused layer for naive/parallel backends and existing
post-ops mode for oneDNN.

Use `--strict-assets` to fail when a model JSON or input image directory is
missing instead of skipping that model.
Loading
Loading