Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

No Format
#!/bin/bash

#PBS -N cuda
#PBS -l walltime=100:00:00
#PBS -l select=1:ncpus=1:mem=15gb2gb:ngpus=1,walltime=01:00:00
#PBS -W group_list=gpu
#PBS -q gpu
 gpuq2
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load cudaanaconda3/4.0.172022.10
source activate TorchA100
echo "Hello from $HOSTNAME: date = `date`"
nvcc --version
echo "Finished at `date`"

sample 2

No Format
#!/bin/bash -l
#PBS -m abe
#PBS -M emailaddress@griffith.edu.au
#PBS -N CudaJob
#PBS -q gpu
#PBS -l select=21:ncpus=2:mem=2gb:ngpus=1
#PBS -W group_list=gpu
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load NAMD/NAMD28b1
module load  mpi/intel-4.0

echo "Starting job"
mpirun -r ssh  -n 2 namd2 +idlepoll /export/home/s2594054/pbs/namd/apoa1/namd/apoa1.namd > apoa1.namd.log
echo "Done with job"

...

No Format
qsub run.pbs
824.pbsserver
[s2594054@n027 namd]$ qstat
Job id            Name             User              Time Use S Queue
----------------  ---------------- ----------------  -------- - -----
812.pbsserver     3nss             s2795116          00:00:02 R workq
813.pbsserver     1ivf_naen        s2795116          00:00:01 R workq
818.pbsserver     1ivf_apo         s2795116          00:00:01 R workq
819.pbsserver     1nn2             s2795116          00:00:00 R workq
821.pbsserver     1ivg             s2795116          00:00:00 R workq
824.pbsserver     CudaJob          s2594054          00:00:00 R gpu

Installation

We use Tesla nvidia C2050 GPUs.

CUDA-enabled Device Driver

A specific device driver has been installed to support CUDA

http://www.nvidia.com/Download/index.aspx?lang=en-us

Product Type: tesla
Product Series: C-Class
Product: Tesla C2050

http://www.nvidia.com/content/DriverDownload-March2009/confirmation.php?url=/XFree86/Linux-x86_64/285.05.09/NVIDIA-Linux-x86_64-285.05.09.run&lang=us&type=GeForce

tesla c-2050 drivers ==>

...


Another sample PBS script (n060 node)

No Format
#!/bin/bash 
#PBS -m abe
#PBS -M yourEmail@griffith.edu.au 
#PBS -N testImage
#PBS -q dljun@n060
#PBS -W group_list=deeplearning -A deeplearning
#PBS -l select=1:ncpus=1:ngpus=1:mem=12gb,walltime=300:00:00
#PBS -j oe
cd  $PBS_O_WORKDIR
module load anaconda/5.3.1py3
source activate tensorflow-gpu
echo $CUDA_VISIBLE_DEVICES
GPUNUM=`echo $CUDA_VISIBLE_DEVICES`
sleep 2000
##echo "python main.py --cfg cfg/config3.yml --gpu $GPUNUM &"


Installation

We use Tesla nvidia C2050 GPUs.

CUDA-enabled Device Driver

A specific device driver has been installed to support CUDA

http://www.nvidia.com/Download/index.aspx?lang=en-us

Product Type: tesla
Product Series: C-Class
Product: Tesla C2050

http://www.nvidia.com/content/DriverDownload-March2009/confirmation.php?url=/XFree86/Linux-x86_64/285.05.09/NVIDIA-Linux-x86_64-285.05.09.run&lang=us&type=GeForce

tesla c-2050 drivers ==>

No Format
-----------------------


ls /devsbin/nvidia*
/dev/nvidia0  /dev/nvidia1  /dev/nvidiactl

-----------------------

 /sbin/modprobe -l|grep -i nvidia
kernel/drivers/video/backlight/mbp_nvidia_bl.ko
kernel/drivers/video/nvidia/nvidiafb.ko
kernel/drivers/video/nvidia.ko

------lspci | grep -i NVIDIA
03:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
03:00.1 Audio device: nVidia Corporation GF100 High Definition Audio Controller (rev a1)
85:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
85:00.1 Audio device: nVidia Corporation GF100 High Definition Audio Controller (rev a1)

-----------------------
/sbin/lspci | 
No Format
shgrep NVIDIA-Linux-x86_64-285.05.09.run --list|awk '{print $6}'

/32/
./32/libnvidia-glcore.so.285.05.09
./32/tls/
./32/tls/libnvidia-tls.so.285.05.09
./32/libOpenCL.so.1.0.0
./32/vdpau/
./32/libvdpau.so.285.05.09
./32/libvdpau_nvidia.so.285.05.09
./32/libGL.la
./32/libvdpau_trace.so.285.05.09
./32/libnvidia-tls.so.285.05.09
./32/libcuda.so.285.05.09
./32/libnvidia-ml.so.285.05.09
./32/libGL.so.285.05.09
./32/libnvidia-compiler.so.285.05.09
./libnvidia-glcore.so.285.05.09
./libnvcuvid.so.285.05.09
./libXvMCNVIDIA.so.285.05.09
./gl.h
./libglx.so.285.05.09
./tls/
./tls/libnvidia-tls.so.285.05.09
./NVIDIA_Changelog
./nvidia-debugdump
./makeself.sh
./libOpenCL.so.1.0.0
./libvdpau.so.285.05.09
./libvdpau_nvidia.so.285.05.09
./mkprecompiled
./pkg-history.txt
./LICENSE
./libGL.la
./nvidia-settings
./libvdpau_trace.so.285.05.09
./nvidia-settings.desktop
./README.txt
./nvidia_drv.so
./glx.h
./nvidia.icd
./nvidia-bug-report.sh
./nvidia-smi.1.gz
./libnvidia-cfg.so.285.05.09
-i NVIDIA | grep "VGA compatible controller"
03:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
85:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)

-----------------------

 ls /dev/nvidia*
/dev/nvidia0  /dev/nvidia1  /dev/nvidiactl

-----------------------

 /sbin/modprobe -l|grep -i nvidia
kernel/drivers/video/backlight/mbp_nvidia_bl.ko
kernel/drivers/video/nvidia/nvidiafb.ko
kernel/drivers/video/nvidia.ko

-----------------------



No Format
sh NVIDIA-Linux-x86_64----------------

<snip>


---------------------------------
./kernel/rmil.h
./kernel/xapi-sdk.h
./kernel/os-smp.c
./kernel/nv-vm.c
./kernel/os-agp.c
./kernel/os-usermap.c
./kernel/nv-linux.h
./glxext.h
./libXvMCNVIDIA.a

Software

CUDA Toolkit

The CUDA Toolkit has all the development tools, libraries, and documentation you need to create applications for the CUDA architecture, including:

CUDA C/C++ Compiler
GPU Debugging & Profiling Tools CUDA-GDB debugger
GPU-Accelerated Math Libraries and Performance Primitives
(GPU-accelerated BLAS library,GPU-accelerated FFT library,GPU-accelerated Sparse Matrix library,GPU-accelerated RNG library)
C/C++ compiler
Visual Profiler
Additional tools and documentation

http://developer.nvidia.com/cuda-toolkit

sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list

No Format
sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list|awk '{print $6}'|sed 's/^./\/usr\/local\/cuda/g'

/usr/local/cuda/
/usr/local/cuda/install-linux.pl
/usr/local/cuda/doc/
/usr/local/cuda/doc/Thrust_Quick_Start_Guide.pdf
/usr/local/cuda/doc/CUSPARSE_Library.pdf
/usr/local/cuda/doc/cuda-memcheck.pdf
/usr/local/cuda/doc/CUBLAS_Library.pdf
/usr/local/cuda/doc/cuobjdump.pdf
/usr/local/cuda/doc/OpenCL_Implementation_Notes.txt
/usr/local/cuda/doc/OpenCL_Jumpstart_Guide.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.html
/usr/local/cuda/doc/OpenCL_Programming_Overview.pdf
/usr/local/cuda/doc/CUDA_C_Best_Practices_Guide.pdf
/usr/local/cuda/doc/OpenCL_Best_Practices_Guide.pdf
/usr/local/cuda/doc/ptx_isa_2.3.pdf
/usr/local/cuda/doc/CUDA_C_Programming_Guide.pdf
/usr/local/cuda/doc/ptx_isa_1.4.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.pdf
/usr/local/cuda/doc/Fermi_Tuning_Guide.pdf
/usr/local/cuda/doc/html/
---------------------------------

<snip>


---------------------------------

/usr/local/cuda/computeprof/projects/analysis_boxFilter_Context_0.csv
/usr/local/cuda/computeprof/projects/eigenvalues_eigenvalues_Context_0.csv
/usr/local/cuda/computeprof/projects/analysis_convolutionSeparable_Context_0.csv
/usr/local/cuda/computeprof/projects/MonteCarloMultiGPU_Session1_Context_2.csv
/usr/local/cuda/computeprof/Compute_Visual_Profiler_Release_Notes_Linux.txt
/usr/local/cuda/src/
/usr/local/cuda/src/fortran_thunking.c
/usr/local/cuda/src/icc_math.h.diff
/usr/local/cuda/src/fortran_thunking.h
/usr/local/cuda/src/fortran_common.h
/usr/local/cuda/src/fortran.c
/usr/local/cuda/src/cusparse_fortran.h
/usr/local/cuda/src/cusparse_fortran.c
/usr/local/cuda/src/fortran.h
/usr/local/cuda/src/cusparse_fortran_common.h

CUDA SDK - gpucomputingsdk

Installation Directory: /usr/local/cuda/NVIDIA_GPU_Computing_SDK

http://developer.nvidia.com/gpu-computing-sdk

The NVIDIA GPU Computing SDK provides hundreds of code samples, white papers, to help you get started on the path of writing software with CUDA C/C++, OpenCL or DirectCompute.

http://developer.download.nvidia.com/compute/cuda/4_0/sdk/gpucomputingsdk_4.0.17_linux.run

No Format
sh gpucomputingsdk_4.0.17_linux.run --list|awk '{print $6}'

./sdk/
./sdk/doc/
./sdk/doc/release/
./sdk/doc/release/CUDA_SDK_New_Features_Guide.pdf
./sdk/doc/release/Getting_Started_With_CUDA_SDK_Samples.pdf
./sdk/doc/release/License.pdf
./sdk/doc/GPU_COMPUTING_SDK_Description.rtf
./sdk/doc/CUDA_SDK_Release_Notes.txt
./sdk/doc/OpenCL_Release_Notes.txt
./sdk/shared/
./sdk/shared/inc/
./sdk/shared/inc/GL/
./sdk/shared/inc/GL/freeglut.h
./sdk/shared/inc/GL/freeglut_ext.h
./sdk/shared/inc/GL/freeglut_std.h
./sdk/shared/inc/GL/gl.h
./sdk/shared/inc/GL/glew.h
./sdk/shared/inc/GL/glext.h
./sdk/shared/inc/GL/glu.h
./sdk/shared/inc/GL/glut.h
./sdk/shared/inc/GL/glxew.h
./sdk/shared/inc/GL/glxext.h
./sdk/shared/inc/GL/wglew.h
./sdk/shared/inc/cmd_arg_reader.h
./sdk/shared/inc/dynlink/
./sdk/shared/inc/dynlink/channel_descriptor_dynlink.h
./sdk/shared/inc/dynlink/common_functions_dynlink.h
./sdk/shared/inc/dynlink/cuda_drvapi_dynlink.h
./sdk/shared/inc/dynlink/cuda_runtime_api_dynlink.h
./sdk/shared/inc/dynlink/cuda_runtime_dynlink.h
./sdk/shared/inc/dynlink/cuda_texture_types_dynlink.h
./sdk/shared/inc/dynlink/device_functions_dynlink.h
./sdk/shared/inc/dynlink/math_functions_dbl_ptx3_dynlink.h
./sdk/shared/inc/dynlink/math_functions_dynlink285.05.09.run --list|awk '{print $6}'

/32/
./32/libnvidia-glcore.so.285.05.09
./32/tls/
./32/tls/libnvidia-tls.so.285.05.09
./32/libOpenCL.so.1.0.0
./32/vdpau/
./32/libvdpau.so.285.05.09
./32/libvdpau_nvidia.so.285.05.09
./32/libGL.la
./32/libvdpau_trace.so.285.05.09
./32/libnvidia-tls.so.285.05.09
./32/libcuda.so.285.05.09
./32/libnvidia-ml.so.285.05.09
./32/libGL.so.285.05.09
./32/libnvidia-compiler.so.285.05.09
./libnvidia-glcore.so.285.05.09
./libnvcuvid.so.285.05.09
./libXvMCNVIDIA.so.285.05.09
./gl.h
./libglx.so.285.05.09
./tls/
./tls/libnvidia-tls.so.285.05.09
./NVIDIA_Changelog
./nvidia-debugdump
./makeself.sh
./libOpenCL.so.1.0.0
./libvdpau.so.285.05.09
./libvdpau_nvidia.so.285.05.09
./mkprecompiled
./pkg-history.txt
./LICENSE
./libGL.la
./nvidia-settings
./libvdpau_trace.so.285.05.09
./nvidia-settings.desktop
./README.txt
./nvidia_drv.so
./glx.h
./nvidia.icd
./nvidia-bug-report.sh
./nvidia-smi.1.gz
./libnvidia-cfg.so.285.05.09
---------------------------------

<snip>


---------------------------------
./kernel/rmil.h
./kernel/xapi-sdk.h
./kernel/os-smp.c
./kernel/nv-vm.c
./kernel/os-agp.c
./kernel/os-usermap.c
./kernel/nv-linux.h
./glxext.h
./libXvMCNVIDIA.a

Software

CUDA Toolkit

The CUDA Toolkit has all the development tools, libraries, and documentation you need to create applications for the CUDA architecture, including:

CUDA C/C++ Compiler
GPU Debugging & Profiling Tools CUDA-GDB debugger
GPU-Accelerated Math Libraries and Performance Primitives
(GPU-accelerated BLAS library,GPU-accelerated FFT library,GPU-accelerated Sparse Matrix library,GPU-accelerated RNG library)
C/C++ compiler
Visual Profiler
Additional tools and documentation

http://developer.nvidia.com/cuda-toolkit

sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list

No Format
sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list|awk '{print $6}'|sed 's/^./\/usr\/local\/cuda/g'

/usr/local/cuda/
/usr/local/cuda/install-linux.pl
/usr/local/cuda/doc/
/usr/local/cuda/doc/Thrust_Quick_Start_Guide.pdf
/usr/local/cuda/doc/CUSPARSE_Library.pdf
/usr/local/cuda/doc/cuda-memcheck.pdf
/usr/local/cuda/doc/CUBLAS_Library.pdf
/usr/local/cuda/doc/cuobjdump.pdf
/usr/local/cuda/doc/OpenCL_Implementation_Notes.txt
/usr/local/cuda/doc/OpenCL_Jumpstart_Guide.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.html
/usr/local/cuda/doc/OpenCL_Programming_Overview.pdf
/usr/local/cuda/doc/CUDA_C_Best_Practices_Guide.pdf
/usr/local/cuda/doc/OpenCL_Best_Practices_Guide.pdf
/usr/local/cuda/doc/ptx_isa_2.3.pdf
/usr/local/cuda/doc/CUDA_C_Programming_Guide.pdf
/usr/local/cuda/doc/ptx_isa_1.4.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.pdf
/usr/local/cuda/doc/Fermi_Tuning_Guide.pdf
/usr/local/cuda/doc/html/
---------------------------------

<snip>


---------------------------------

/usr/local/cuda/computeprof/projects/analysis_boxFilter_Context_0.csv
/usr/local/cuda/computeprof/projects/eigenvalues_eigenvalues_Context_0.csv
/usr/local/cuda/computeprof/projects/analysis_convolutionSeparable_Context_0.csv
/usr/local/cuda/computeprof/projects/MonteCarloMultiGPU_Session1_Context_2.csv
/usr/local/cuda/computeprof/Compute_Visual_Profiler_Release_Notes_Linux.txt
/usr/local/cuda/src/
/usr/local/cuda/src/fortran_thunking.c
/usr/local/cuda/src/icc_math.h.diff
/usr/local/cuda/src/fortran_thunking.h
/usr/local/cuda/src/fortran_common.h
/usr/local/cuda/src/fortran.c
/usr/local/cuda/src/cusparse_fortran.h
/usr/local/cuda/src/cusparse_fortran.c
/usr/local/cuda/src/fortran.h
/usr/local/cuda/src/cusparse_fortran_common.h

CUDA SDK - gpucomputingsdk

Installation Directory: /usr/local/cuda/NVIDIA_GPU_Computing_SDK

http://developer.nvidia.com/gpu-computing-sdk

The NVIDIA GPU Computing SDK provides hundreds of code samples, white papers, to help you get started on the path of writing software with CUDA C/C++, OpenCL or DirectCompute.

http://developer.download.nvidia.com/compute/cuda/4_0/sdk/gpucomputingsdk_4.0.17_linux.run

No Format
sh gpucomputingsdk_4.0.17_linux.run --list|awk '{print $6}'

./sdk/
./sdk/doc/
./sdk/doc/release/
./sdk/doc/release/CUDA_SDK_New_Features_Guide.pdf
./sdk/doc/release/Getting_Started_With_CUDA_SDK_Samples.pdf
./sdk/doc/release/License.pdf
./sdk/doc/GPU_COMPUTING_SDK_Description.rtf
./sdk/doc/CUDA_SDK_Release_Notes.txt
./sdk/doc/OpenCL_Release_Notes.txt
./sdk/shared/
./sdk/shared/inc/
./sdk/shared/inc/GL/
./sdk/shared/inc/GL/freeglut.h
./sdk/shared/inc/dynlinkGL/texture_fetch_functions_dynlinkfreeglut_ext.h
./sdk/shared/inc/GL/exceptionfreeglut_std.h
./sdk/shared/inc/GL/multithreadinggl.h
./sdk/shared/inc/GL/nvGLWidgetsglew.h
./sdk/shared/inc/nvGlutWidgetsGL/glext.h
./sdk/shared/inc/nvMathGL/glu.h
./sdk/shared/inc/GL/nvMatrixglut.h
./sdk/shared/inc/GL/nvQuaternionglxew.h
./sdk/shared/inc/nvShaderUtilsGL/glxext.h
./sdk/shared/inc/GL/nvVectorwglew.h
./sdk/shared/inc/nvWidgetscmd_arg_reader.h
./sdk/shared/inc/rendercheckGL.hdynlink/
./sdk/shared/inc/shrQATestdynlink/channel_descriptor_dynlink.h
./sdk/shared/inc/shrUtils./dynlink/common_functions_dynlink.h
./sdk/shared/inc/dynlink/cuda_drvapi_dynlink.h
./sdk/shared/inc/stopwatch/dynlink/cuda_runtime_api_dynlink.h
./sdk/shared/inc/stopwatch_basedynlink/cuda_runtime_dynlink.h
./sdk/shared/inc/stopwatch_base.inl/dynlink/cuda_texture_types_dynlink.h
./sdk/shared/inc/dynlink/stopwatchdevice_functions_linuxdynlink.h
./sdk/shared/inc/lib/
---------------------------------
<snip>
---------------------------------
./sdk/OpenCL/src/oclVectorAdd/oclVectorAdd.cppdynlink/math_functions_dbl_ptx3_dynlink.h
./sdk/shared/inc/dynlink/math_functions_dynlink.h
./sdk/OpenCLshared/srcinc/oclVolumeRender/dynlink/texture_fetch_functions_dynlink.h
./sdk/OpenCLshared/src/oclVolumeRender/Makefileinc/exception.h
./sdk/OpenCLshared/src/oclVolumeRender/data/inc/multithreading.h
./sdk/OpenCLshared/src/oclVolumeRender/data/Bucky.rawinc/nvGLWidgets.h
./sdk/OpenCLshared/src/oclVolumeRender/doc/inc/nvGlutWidgets.h
./sdk/OpenCLshared/src/oclVolumeRender/doc/sshot_lg.JPGinc/nvMath.h
./sdk/OpenCLshared/src/oclVolumeRender/doc/sshot_md.jpginc/nvMatrix.h
./sdk/OpenCLshared/src/oclVolumeRender/doc/sshot_sm.JPGinc/nvQuaternion.h
./sdk/OpenCLshared/srcinc/oclVolumeRender/oclVolumeRendernvShaderUtils.cpph
./sdk/OpenCLshared/srcinc/oclVolumeRender/volumeRendernvVector.clh
./sdk/OpenCLshared/src/oclInlinePTX/inc/nvWidgets.h
./sdk/OpenCLshared/src/oclInlinePTX/Makefileinc/rendercheckGL.h
./sdk/OpenCLshared/srcinc/oclInlinePTX/inlinePTXshrQATest.clh
./sdk/OpenCLshared/srcinc/oclInlinePTX/oclInlinePTXshrUtils.cpph
./sdk/OpenCLshared/releaseNotesDatainc/
./sdk/OpenCL/releaseNotesData/GEF8_2D_wte.gifstopwatch.h
./sdk/OpenCLshared/releaseNotesDatainc/GEF9stopwatch_2D_wtebase.gifh
./sdk/OpenCLshared/releaseNotesDatainc/GEFGTX200stopwatch_2D_wtebase.gifinl
./sdk/OpenCLshared/releaseNotesData/NVSphere.ico
./sdk/OpenCL/releaseNotesData/QUA_FX_4600_White.gifinc/stopwatch_linux.h
./sdk/OpenCLshared/releaseNotesDatalib/link.jpg
./sdk/OpenCL/releaseNotesData/tesla.gif
---------------------------------
<snip>
---------------------------------
./sdk/OpenCL/src/oclVectorAdd/oclVectorAdd.cpp
./sdk/OpenCL/src/MakefileoclVolumeRender/
./sdk/OpenCL/Samples.htmlsrc/oclVolumeRender/Makefile
./sdk/Documentation.html/OpenCL/src/oclVolumeRender/data/
./sdk/MakefileOpenCL/src/oclVolumeRender/data/Bucky.raw
./sdk/License.txt/OpenCL/src/oclVolumeRender/doc/
./sdk/cudpp_license.txt

Installation

No Format
 module load cuda/4.0.17;cd /tmp/tmp2;sh gpucomputingsdk_4.0.17_linux.run 

Enter install path (default ~/NVIDIA_GPU_Computing_SDK): /usr/local/cuda/NVIDIA_GPU_Computing_SDK

Located CUDA at /usr/local/cuda
If this is correct, choose the default below.
If it is not correct, enter the correct path to CUDA

Enter CUDA install path (default /usr/local/cuda):


-------------------------

<snip>

-------------------------

========================================

Configuring SDK Makefile (/usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/common/common.mk)...

========================================

* Please make sure your PATH includes /usr/local/cuda/bin
* Please make sure your LD_LIBRARY_PATH includes /usr/local/cuda/lib

* To uninstall the NVIDIA GPU Computing SDK, please delete /usr/local/cuda/NVIDIA_GPU_Computing_SDK
* Installation Complete


Image Install
=============

mount --bind /proc/ /compute/proc/
mount --bind /dev /compute/dev
chroot /compute/
 module load cuda/4.0.17
cd /tmp;sh gpucomputingsdk_4.0.17_linux.run
exit
umount /compute/dev
umount /compute/proc

Matlab Plug-in for CUDA

Not Installed at this time but can be install on request

http://developer.nvidia.com/cuda-tools-ecosystem

No Format
N/A

CUDA Visual Profiler

manual: http://confluence.rcs.griffith.edu.au:8080/download/attachments/25428765/Compute_Visual_Profiler_User_Guide.pdf

This is installed with the CUDA toolkit.

module load cuda/4.0.17
computeprof &

http://developer.nvidia.com/cuda-tools-ecosystem

No Format
N/A

other compilation

Install the following packages before comiling:

freeglut-2.6.0-1.el6.x86_64.rpm libdrm-devel-2.4.23-1.el6.x86_64.rpm mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm mesa-libGLU-devel-7.10-1.el6.x86_64.rpm

No Format
cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C
 module load cuda/4.0.17
make
(or:  make 2>&1 |tee make.output.txt)

Run the sample codes.

 module load cuda/4.0.17
cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/bin/linux/release
./deviceQuery
./nbody

No Format
 ./deviceQuery
[deviceQuery] starting...
./deviceQuery Starting...

 CUDA Device Query (Runtime API) version (CUDART static linking)

Found 2 CUDA Capable device(s)

Device 0: "Tesla C2070"
  CUDA Driver Version / Runtime Version          /OpenCL/src/oclVolumeRender/doc/sshot_lg.JPG
./sdk/OpenCL/src/oclVolumeRender/doc/sshot_md.jpg
./sdk/OpenCL/src/oclVolumeRender/doc/sshot_sm.JPG
./sdk/OpenCL/src/oclVolumeRender/oclVolumeRender.cpp
./sdk/OpenCL/src/oclVolumeRender/volumeRender.cl
./sdk/OpenCL/src/oclInlinePTX/
./sdk/OpenCL/src/oclInlinePTX/Makefile
./sdk/OpenCL/src/oclInlinePTX/inlinePTX.cl
./sdk/OpenCL/src/oclInlinePTX/oclInlinePTX.cpp
./sdk/OpenCL/releaseNotesData/
./sdk/OpenCL/releaseNotesData/GEF8_2D_wte.gif
./sdk/OpenCL/releaseNotesData/GEF9_2D_wte.gif
./sdk/OpenCL/releaseNotesData/GEFGTX200_2D_wte.gif
./sdk/OpenCL/releaseNotesData/NVSphere.ico
./sdk/OpenCL/releaseNotesData/QUA_FX_4600_White.gif
./sdk/OpenCL/releaseNotesData/link.jpg
./sdk/OpenCL/releaseNotesData/tesla.gif
./sdk/OpenCL/Makefile
./sdk/OpenCL/Samples.html
./sdk/Documentation.html
./sdk/Makefile
./sdk/License.txt
./sdk/cudpp_license.txt

Installation

No Format
 module load cuda/4.0.17;cd /tmp/tmp2;sh gpucomputingsdk_4.0.17_linux.run 

Enter install path (default ~/NVIDIA_GPU_Computing_SDK): /usr/local/cuda/NVIDIA_GPU_Computing_SDK

Located CUDA at /usr/local/cuda
If this is correct, choose the default below.
If it is not correct, enter the correct path to CUDA

Enter CUDA install path (default /usr/local/cuda):


-------------------------

<snip>

-------------------------

========================================

Configuring SDK Makefile (/usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/common/common.mk)...

========================================

* Please make sure your PATH includes /usr/local/cuda/bin
* Please make sure your LD_LIBRARY_PATH includes /usr/local/cuda/lib

* To uninstall the NVIDIA GPU Computing SDK, please delete /usr/local/cuda/NVIDIA_GPU_Computing_SDK
* Installation Complete


Image Install
=============

mount --bind /proc/ /compute/proc/
mount --bind /dev /compute/dev
chroot /compute/
 module load cuda/4.0.17
cd /tmp;sh gpucomputingsdk_4.0.17_linux.run
exit
umount /compute/dev
umount /compute/proc

Matlab Plug-in for CUDA

Not Installed at this time but can be install on request

http://developer.nvidia.com/cuda-tools-ecosystem

No Format
N/A

CUDA Visual Profiler

manual: http://confluence.rcs.griffith.edu.au:8080/download/attachments/25428765/Compute_Visual_Profiler_User_Guide.pdf

This is installed with the CUDA toolkit.

module load cuda/4.0.17
computeprof &

http://developer.nvidia.com/cuda-tools-ecosystem

No Format
N/A

other compilation

Install the following packages before comiling:

freeglut-2.6.0-1.el6.x86_64.rpm libdrm-devel-2.4.23-1.el6.x86_64.rpm mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm mesa-libGLU-devel-7.10-1.el6.x86_64.rpm

No Format
cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C
 module load cuda/4.0.17
make
(or:  make 2>&1 |tee make.output.txt)

Run the sample codes.

 module load cuda/4.0.17
cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/bin/linux/release
./deviceQuery
./nbody


No Format
 ./deviceQuery
[deviceQuery] starting...
./deviceQuery Starting...

 CUDA Device Query (Runtime API) version (CUDART static linking)

Found 2 CUDA Capable device(s)

Device 0: "Tesla C2070"
  CUDA Driver Version / Runtime Version          4.0 / 4.0
  CUDA Capability Major/Minor version number:    2.0
  Total amount of global memory:                 5375 MBytes (5636554752 bytes)
  (14) Multiprocessors x (32) CUDA Cores/MP:     448 CUDA Cores
  GPU Clock Speed:                               1.15 GHz
  Memory Clock rate:                             1494.00 Mhz
  Memory Bus Width:                              384-bit
  L2 Cache Size:                                 786432 bytes
  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 32768
  Warp size:                                     32
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
  Maximum memory pitch:                          2147483647 bytes
  Texture alignment:                             512 bytes
  Concurrent copy and execution:                 Yes with 2 copy engine(s)
  Run time limit on kernels:                     No
  Integrated GPU sharing Host Memory:            No
  Support host page-locked memory mapping:       Yes
  Concurrent kernel execution:                   Yes
  Alignment requirement for Surfaces:            Yes
  Device has ECC support enabled:                Yes
  Device is using TCC driver mode:               No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:           3 / 0
  Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

Device 1: "Tesla C2070"
  CUDA Driver Version / Runtime Version          4.0 / 4.0
  CUDA Capability Major/Minor version number:    2.0
  Total amount of global memory:                 5375 MBytes (5636554752 bytes)
  (14) Multiprocessors x (32) CUDA Cores/MP:     448 CUDA Cores
  GPU Clock Speed:                               1.15 GHz
  Memory Clock rate:                             1494.00 Mhz
  Memory Bus Width:                              384-bit
  L2 Cache Size:                                 786432 bytes
  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 32768
  Warp size:                                     32
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
  Maximum memory pitch:                          2147483647 bytes
  Texture alignment:                             512 bytes
  Concurrent copy and execution:                 Yes with 2 copy engine(s)
  Run time limit on kernels:                     No
  Integrated GPU sharing Host Memory:            No
  Support host page-locked memory mapping:       Yes
  Concurrent kernel execution:                   Yes
  Alignment requirement for Surfaces:            Yes
  Device has ECC support enabled:                Yes
  Device is using TCC driver mode:               No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:       No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:           133 / 0
  Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.0, CUDA Runtime Version = 4.0, NumDevs = 2, Device = Tesla C2070, Device = Tesla C2070
[deviceQuery] test results...
PASSED

Press ENTER to exit...

Demo

fluidsGL
smokeParticles
particles
postProcessGL

Ref:

1. http://us.download.nvidia.com/XFree86/Linux-x86_64/275.09.07/NVIDIA-Linux-x86_64-275.09.07.run
2. http://www.nvidia.com/Download/index.aspx?lang=en-us
3. http://en.wikipedia.org/wiki/Nvidia_Tesla#Specifications_and_configurations
4. http://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications
5. http://code.google.com/p/stanford-cs193g-sp2010/wiki/TutorialWhenSomethingGoesWrong

notes

No Format
Install the following packages before compiling:

freeglut-2.6.0-1.el6.x86_64.rpm       133 / 0 libdrm-devel-2.4.23-1.el6.x86_64.rpm    Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.0, CUDA Runtime Version = 4.0, NumDevs = 2, Device = Tesla C2070, Device = Tesla C2070
[deviceQuery] test results...
PASSED

Press ENTER to exit...

Demo

fluidsGL
smokeParticles
particles
postProcessGL

Ref:

1. http://us.download.nvidia.com/XFree86/Linux-x86_64/275.09.07/NVIDIA-Linux-x86_64-275.09.07.run
2. http://www.nvidia.com/Download/index.aspx?lang=en-us
3. http://en.wikipedia.org/wiki/Nvidia_Tesla#Specifications_and_configurations
4. http://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications
5. http://code.google.com/p/stanford-cs193g-sp2010/wiki/TutorialWhenSomethingGoesWrong

notes

No Format
Install the following packages before compiling:

freeglut-2.6.0-1.el6.x86_64.rpm        libdrm-devel-2.4.23-1.el6.x86_64.rpm     mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm  libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm  mesa-libGLU-devel-7.10-1.el6.x86_64.rpm




notes2

No Format
ln -s /usr/lib64/libGLU.so.1.3.071000  /usr/lib64/libGLU.so
ln -s /usr/lib64/libglut.so.3 /usr/lib64/libglut.so
cp -r mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm  libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm  mesa-libGLU-devel-7.10-1.el6.x86_64.rpm




notes2

No Format
ln -s /usr/lib64/libGLU.so.1.3.071000  /usr/lib64/libGLU.so
ln -s /usr/lib64/libglut.so.3 /usr/lib64/libglut.so
cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/oclVolumeRender


cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/


cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/common/inc/CL/

ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/oclMarchingCubes/GL



ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBandwidthTest/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBlackScholes/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBoxFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclConvolutionSeparable/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclCopyComputeOverlap/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDCT8x8/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDeviceQuery/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDotProduct/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDXTCompression/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclFDTD3d/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHiddenMarkovModel/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHistogram/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclInlinePTX/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMarchingCubes/GL
ln -s /sw/cuda/NVIDIA_GPU_Computing_SDKCUDAToolsSDK/4.0.17/OpenCLCUPTI/srcinclude/oclVolumeRenderGL oclMatrixMul/GL
ln cp -rs /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMatVecMul/GL
ln -s /sw/cuda/NVIDIA_GPU_Computing_SDKCUDAToolsSDK/4.0.17/OpenCLCUPTI/srcinclude/GL oclMedianFilter/GL
ln cp -rs /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMersenneTwister/GL
ln -s /sw/cuda/NVIDIA_GPU_Computing_SDKCUDAToolsSDK/4.0.17/OpenCLCUPTI/commoninclude/inc/CL/
GL oclNbody/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclParticles/sw/cuda/NVIDIA_GPU_Computing_SDKGL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/OpenCLCUPTI/srcinclude/oclMarchingCubesGL oclPostprocessGL/GL



ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBandwidthTestoclQuasirandomGenerator/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBlackScholesoclRadixSort/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBoxFilteroclRecursiveGaussian/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclConvolutionSeparableoclReduction/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclCopyComputeOverlapoclScan/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDCT8x8oclSimpleGL/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDeviceQueryoclSimpleMultiGPU/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDotProductoclSimpleTexture3D/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDXTCompressionoclSobelFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclFDTD3doclSortingNetworks/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHiddenMarkovModeloclTranspose/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHistogramoclTridiagonal/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclInlinePTXoclVectorAdd/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMarchingCubesoclVolumeRender/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMatrixMul/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMatVecMul/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMedianFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMersenneTwister/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclNbody/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclParticles/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclPostprocessGL/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclQuasirandomGenerator/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclRadixSort/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclRecursiveGaussian/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclReduction/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclScan/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleGL/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleMultiGPU/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleTexture3D/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSobelFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSortingNetworks/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclTranspose/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclTridiagonal/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclVectorAdd/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclVolumeRender/GL


cd /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL
make


cd /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL
make


CuDDN

No Format
cudnn-10.0
============
tar -zxvf cudnn-10.0-linux-x64-v7.6.5.32.tgz 
cuda/include/cudnn.h
cuda/NVIDIA_SLA_cuDNN_Support.txt
cuda/lib64/libcudnn.so
cuda/lib64/libcudnn.so.7
cuda/lib64/libcudnn.so.7.6.5
cuda/lib64/libcudnn_static.a

cp cuda/include/cudnn.h /usr/local/cuda-10.0/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.0/lib64
chmod a+r /usr/local/cuda-10.0/include/cudnn.h /usr/local/cuda-10.0/lib64/libcudnn*


cudnn-10.1
==========

cd /tmp; tar -zxvf cudnn-10.1-linux-x64-v7.6.5.32.tgz
cp cuda/include/cudnn.h /usr/local/cuda-10.1/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.1/lib64
 chmod a+r /usr/local/cuda-10.1/include/cudnn.h /usr/local/cuda-10.1/lib64/libcudnn*

cudnn-10.2
==========

tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 
cp cuda/include/cudnn.h /usr/local/cuda-10.2/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.2/lib64
chmod a+r /usr/local/cuda-10.2/include/cudnn.h /usr/local/cuda-10.2/lib64/libcudnn*


Sample pbs script to run on n061 - gpuq2


No Format
#!/bin/bash
#PBS -m abe
#PBS -M emailaddress@griffith.edu.au
#PBS -N CudaJob
#PBS -q gpuq2
#PBS -l select=1:ncpus=1:mem=2gb:ngpus=1,walltime=01:00:00
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load anaconda3/2022.10
source activate TorchA100

echo "Starting job"
python isCuda
echo "Done with job"