WeiNote

zasdfgbnm

2019

Sep 30

nsys profile

nsys.sh

#!/bin/bash

/nsight/bin/nsys profile \
-t cuda,nvtx,osrt,cudnn,cublas \
-s cpu \
--capture-range=cudaProfilerApi \
--stop-on-range-end=true \
--cudabacktrace=true \
--cudabacktrace-threshold=10000 \
--osrt-threshold=10000 \
-x true \
$@

test.py

import torch
print(torch.__version__)
print(torch.version.git_version)

torch.cuda.profiler.start()
a = torch.empty(512 * 1024 * 1024, device='cuda')
b = torch.empty(512 * 1024 * 1024, device='cuda')
a + b
torch.cuda.profiler.stop()

command to run

./nsys.sh python test.py