-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
128 lines (109 loc) · 4.61 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
CXX ?= clang++
INCLUDES = -I/usr/include/hdf5/serial
LIBS = -lhdf5_cpp -lhdf5_hl_cpp -lhdf5_serial -ljsoncpp -lblas
OPENMP = -fopenmp -DUSE_OPENMP # Comment this out to disable openmp
BLAS = -DUSE_BLAS # Comment this out to disable openblas
CXXFLAGS += $(OPENMP) $(BLAS) -std=c++11 -Wall -g -O2 -march=native #-fno-inline-functions -Og
VG ?= valgrind --leak-check=yes
gtest:
g++ tests/lucs.cc -I./include -lgtest -lpthread -o lucs-gtest
./lucs-gtest
.PHONY: test unittest layertest
# -- e.g. make unittest >/dev/null
unittest: ut_tensor ut_blob ut_layer ut_graph
layertest: test_layer_lineq test_layer_mnist_cls test_layer_mnist_reg
graphtest: test_graph_mnist_cls test_graph_mnist_cls2
# -- Fake dataset generator
demo.h5:
python3 gen-demoh5.py
mnist.fake.h5:
python3 gen-demoh5.py mnist
# -- Unit Tests
ut_tensor: tensor.hpp demo.h5
python3 leichtut.py $<
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] $<_ut.cc
$(VG) ./[email protected]
ut_blob: blob.hpp
python3 leichtut.py $<
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] $<_ut.cc
$(VG) ./[email protected]
ut_layer: layer.hpp
python3 leichtut.py $<
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] $<_ut.cc
$(VG) ./[email protected]
ut_graph: graph.hpp
python3 leichtut.py $<
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] $<_ut.cc
$(VG) ./[email protected]
# -- Feel the horror in the performance difference
bench_tensor:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] [email protected] -lblas
# -- Layer-level Tests
test_layer_lineq:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o test_layer_lineq.elf test_layer_lineq.cc
$(VG) ./test_layer_lineq.elf #OK
test_layer_mnist_reg:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o test_layer_mnist_reg.elf test_layer_mnist_reg.cc
$(VG) ./test_layer_mnist_reg.elf #OK
test_layer_mnist_cls:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o test_layer_mnist_cls.elf test_layer_mnist_cls.cc
$(VG) ./test_layer_mnist_cls.elf #OK
test_layer_lenet_cls:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o test_layer_lenet_cls.elf test_layer_lenet_cls.cc
$(VG) ./test_layer_lenet_cls.elf #FIXME
# -- Graph-level Tests
test_graph_mnist_cls:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o test_graph_mnist_cls.elf test_graph_mnist_cls.cc
$(VG) ./test_graph_mnist_cls.elf # working, but FIXME: Memory issue
test_graph_mnist_reg:
$(CXX) $(INCLUDES) $(LIBS) $(CXXFLAGS) -o [email protected] [email protected]
$(VG) ./[email protected] # working, but FIXME: Memory issue
# -- Python Binding
swig:
CFLAGS="$(CXXFLAGS)" python3 setup.py build
-cp build/lib.linux-x86_64-3.6/leicht.py leicht.py
cp build/lib.linux-x86_64-3.6/leicht.*.so _leicht.so
pyunit: swig
python3 test_pyunit_tensor.py -v
# -- Benchmarks
.PHONY: benchmark
BASEFLAG= -std=c++11 -Wall -fopenmp -DUSE_OPENMP
benchmark: mnist.fake.h5
# change the code to use the fake dataset, and change iterations
cp test_mnist_cls.cc test_benchmark.cc
sed -i -e 's/mnist.h5/mnist.fake/g' test_benchmark.cc
@echo
# you can use "time" instead of "perf"
# warm up and report nothing, the kernel may cache something
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O0 -o benchmark.elf test_benchmark.cc
./benchmark.elf > /dev/null
@echo
# compile with -O0 and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O0 -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O0 -march=native and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O0 -march=native -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O0 -march=native -flto and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O0 -march=native -flto -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O2 and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O2 -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O2 -march=native and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O2 -march=native -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O2 -flto and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O2 -flto -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O2 -march=native -flto and test
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O2 -march=native -flto -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
# compile with -O3 -march=native -flto and test, some times the aggressive optimization may not improve performance.
$(CXX) $(INCLUDES) $(LIBS) $(BASEFLAG) -O3 -march=native -flto -o benchmark.elf test_benchmark.cc
sudo perf stat ./benchmark.elf > /dev/null
clean:
-$(RM) demo.h5 *.elf test.leicht *_ut.cc
-$(RM) leicht.py _leicht.so leicht_wrap.cpp
-$(RM) -rf __pycache__/ build