Skip to content

Commit a49dee9

Browse files
authored
Gpu estimation command (#48)
* gpu usage command integration * changed name of memory output field * changed comment
1 parent c3ff263 commit a49dee9

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

deepview_profile/__main__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import deepview_profile.commands.interactive
77
import deepview_profile.commands.memory
88
import deepview_profile.commands.time
9+
import deepview_profile.commands.gpu_estimation
910

1011

1112
def main():
@@ -23,6 +24,7 @@ def main():
2324
deepview_profile.commands.interactive.register_command(subparsers)
2425
deepview_profile.commands.memory.register_command(subparsers)
2526
deepview_profile.commands.time.register_command(subparsers)
27+
deepview_profile.commands.gpu_estimation.register_command(subparsers)
2628
args = parser.parse_args()
2729

2830
if args.version:
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import subprocess
2+
import sys
3+
import os
4+
import sqlite3
5+
from collections import defaultdict
6+
7+
NS_TIME = 1e9
8+
9+
def register_command(subparsers):
10+
parser = subparsers.add_parser(
11+
"gpu-usage-estimation",
12+
help="Estimation of gpu operation time. We recommend running your training cycle for 100 iterations"
13+
)
14+
15+
parser.add_argument(
16+
"path_to_file",
17+
help="path of the file you want to analyze"
18+
)
19+
parser.set_defaults(func=main)
20+
21+
def joinIntervals(arr):
22+
# arr = tuple(type,start,end,streamid)
23+
eventDict = defaultdict(int)
24+
filteredArr = []
25+
prevRecord = list(arr[0])
26+
for i in range(1,len(arr)):
27+
newRecord = list(arr[i])
28+
if prevRecord[1] <= newRecord[1] <= prevRecord[2]:
29+
prevRecord[1] = min(prevRecord[1], newRecord[1])
30+
prevRecord[2] = max(prevRecord[2], newRecord[2])
31+
else:
32+
filteredArr.append(prevRecord)
33+
prevRecord = newRecord
34+
filteredArr.append(prevRecord) # append the last record
35+
for item in filteredArr:
36+
eventDict[item[0]] += (item[2]-item[1])
37+
return eventDict
38+
39+
def sql_command_execution(db_path):
40+
connection = sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
41+
cursor = connection.cursor()
42+
43+
try:
44+
timeline_data = cursor.execute("""
45+
SELECT "memOps" as name, start,end, streamId
46+
FROM CUPTI_ACTIVITY_KIND_MEMCPY
47+
UNION ALL
48+
SELECT "kernelOps" as name, start, end, streamId
49+
FROM CUPTI_ACTIVITY_KIND_KERNEL
50+
UNION ALL
51+
SELECT "memOps" as name, start, end, streamId
52+
FROM CUPTI_ACTIVITY_KIND_MEMSET
53+
ORDER by start ASC;
54+
""").fetchall()
55+
profiling_duration = cursor.execute("""
56+
SELECT duration FROM ANALYSIS_DETAILS;
57+
""").fetchone()[0]
58+
cupti_api_duration = cursor.execute("""
59+
SELECT max(end)-min(start) from CUPTI_ACTIVITY_KIND_RUNTIME;
60+
""").fetchone()[0]
61+
cursor.close()
62+
except sqlite3.Error as er:
63+
print("There was an error reading the information from the sqlite database")
64+
print('SQLite error: %s' % (' '.join(er.args)))
65+
cursor.close()
66+
sys.exit(1)
67+
68+
if not timeline_data:
69+
print("There are no traces of gpu activity")
70+
sys.exit()
71+
gpu_activity_time = joinIntervals(timeline_data)
72+
percgpu_activity = ((gpu_activity_time["kernelOps"]+gpu_activity_time["memOps"])/cupti_api_duration)*100
73+
data = [round(profiling_duration/NS_TIME,3),
74+
round(cupti_api_duration/NS_TIME,3),
75+
round(gpu_activity_time["kernelOps"]/NS_TIME,3),
76+
round(gpu_activity_time["memOps"]/NS_TIME,3),
77+
round(percgpu_activity,3)]
78+
79+
return data
80+
81+
def remove_files(curr_dir):
82+
nsysfile = os.path.join(curr_dir,"gpu_estimation.nsys-rep")
83+
sqlitefile = os.path.join(curr_dir,"gpu_estimation.sqlite")
84+
subprocess.run(["rm",nsysfile], capture_output=True, text=True)
85+
subprocess.run(["rm",sqlitefile], capture_output=True, text=True)
86+
87+
88+
def actual_main(args):
89+
result = subprocess.run(["which","nsys"], capture_output=True, text=True)
90+
if not result.stdout:
91+
print("Please make sure the command nsys is included in your path")
92+
print("You can try: export PATH=[path/to/bin]:$PATH")
93+
print("You can verify using:","\nwhich nsys","\nnsys --version")
94+
sys.exit(1)
95+
96+
curr_dir = subprocess.run(["pwd"], capture_output=True, text=True).stdout.strip()
97+
nsys_output = subprocess.run(["nsys","profile","--trace=cuda,osrt","--cpuctxsw=none","--sample=none","--force-overwrite=true","--stats=true","--output=gpu_estimation","python", args.path_to_file],
98+
stdout=subprocess.PIPE,
99+
stderr=subprocess.PIPE,
100+
text=True)
101+
if nsys_output.stderr:
102+
print("An error ocurred during the analysis")
103+
print("Please make sure that your training is executing on GPU")
104+
print("Error:",nsys_output.stderr)
105+
# remove generated files
106+
remove_files(curr_dir)
107+
sys.exit(1)
108+
109+
db_path = os.path.join(curr_dir,"gpu_estimation.sqlite")
110+
summary = sql_command_execution(db_path)
111+
headers = ["Estimate Profiling time","CUDA API Time","Kernel Ops Time","Memory Ops time","GPU Perc"]
112+
format_row = "{:^25}" * len((headers))
113+
print(format_row.format(*headers))
114+
print(format_row.format(*summary))
115+
# remove generated files
116+
remove_files(curr_dir)
117+
118+
def main(args):
119+
actual_main(args)

0 commit comments

Comments
 (0)