/
cuda

cuda

A queue named gpu has been created and a pbs resource named ngpus created.
Both needs to be called in the pbs script to send batch jobs to the gpu nodes.

We suggest you do an interactive batch run first to sort out any issues. To do that:

qsub -I -l ngpus=2 -l walltime=100:00:00 -q gpu -W group_list=nimrodusers

This will log you into one of the gpu nodes (you may have to wait if the nodes are busy).

Type this on the gpu node:

module load cuda/4.0.17

The cuda binaries are located here: /usr/local/cuda/bin/

NVIDIA drivers contains the tesla c2070 card drivers and support for openCL.
==============

The CUDA toolkit contains (Locations /usr/local/cuda/)
=========================
1. NVIDIA CUDA C/C++ Compiler (NVCC) ==>  nvcc
2. Visual Profiler, CUDA-GDB Debugger, CUDA-MEMCHECK
3. Developer Documentation
    Getting Started Guides, Release Notes, and more..
    CUDA C Programming Guide, CUDA Best Practices Guide
    OpenCL Programming Guide, OpenCL Best Practices Guide

NVIDIA GPU Computing SDK contains hundreds of source code 
samples (location: /usr/local/cuda/NVIDIA_GPU_Computing_SDK/)
=======================

gowonda gpu nodes

Platform Name = NVIDIA CUDA, Platform Version = OpenCL 1.0 CUDA 4.0.1, SDK Revision = 7027912,
NumDevs = 2, Device = Tesla C2070, Device = Tesla C2070

Node names:

n020
n021
n022
n023

nvidia-smi -L

GPU 0: Tesla C2070 (S/N: 0320311069842)
GPU 1: Tesla C2070 (S/N: 0320311069438)

module file

 module display cuda/4.0
-------------------------------------------------------------------
/sw/com/modulefiles/cuda/4.0:

module-whatis    Loads the CUDA gpu computing environment
conflict         cuda/2.3 cuda/3.0b cuda/3.0 cuda/3.1 cuda/3.2rc
append-path      PATH /usr/local/cuda/bin:/usr/local/cuda/computeprof/bin
append-path      LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/computeprof/bin
-------------------------------------------------------------------

usage

module load cuda/4.0.17

module file

#%Module######################################################################
##
##      cuda modulefile
##
proc ModulesHelp { } {
        puts stderr "Sets up the paths you need to use cuda"
}

module-whatis   Loads the CUDA gpu computing environment
conflict        cuda/2.3 cuda/3.0b cuda/3.0 cuda/3.1 cuda/3.2rc
set             CUDA_ROOT               /usr/local/cuda
set             CUDA_INSTALL_PATH       /usr/local/cuda
set             CUBLAS_INSTALL_PATH     /usr/local/cuda
###set          CUBLAS_LIBRARIES        -L/usr/local/cuda/lib64 -lcublas -lcudart
set             CUBLAS_INCLUDES         -I/usr/local/cuda/include
append-path     PATH                    /usr/local/cuda/bin:/usr/local/cuda/computeprof/bin
append-path     LD_LIBRARY_PATH         /usr/local/cuda/lib64:/usr/local/cuda/computeprof/bin


pbs run

use: -l ngpus=2

sample 1

#!/bin/bash

#PBS -N cuda
#PBS -l walltime=100:00:00
#PBS -l select=1:ncpus=1:mem=2gb:ngpus=1,walltime=01:00:00
#PBS -W group_list=gpu
#PBS -q gpuq2
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load anaconda3/2022.10
source activate TorchA100
echo "Hello from $HOSTNAME: date = `date`"
nvcc --version
echo "Finished at `date`"

sample 2

#!/bin/bash
#PBS -m abe
#PBS -M emailaddress@griffith.edu.au
#PBS -N CudaJob
#PBS -q gpu
#PBS -l select=1:ncpus=2:mem=2gb:ngpus=1
#PBS -W group_list=gpu
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load NAMD/NAMD28b1
module load  mpi/intel-4.0

echo "Starting job"
mpirun -r ssh  -n 2 namd2 +idlepoll /export/home/s2594054/pbs/namd/apoa1/namd/apoa1.namd > apoa1.namd.log
echo "Done with job"

Please do change directory names in the above script to reflect your home directory...

qsub run.pbs
824.pbsserver
[s2594054@n027 namd]$ qstat
Job id            Name             User              Time Use S Queue
----------------  ---------------- ----------------  -------- - -----
812.pbsserver     3nss             s2795116          00:00:02 R workq
813.pbsserver     1ivf_naen        s2795116          00:00:01 R workq
818.pbsserver     1ivf_apo         s2795116          00:00:01 R workq
819.pbsserver     1nn2             s2795116          00:00:00 R workq
821.pbsserver     1ivg             s2795116          00:00:00 R workq
824.pbsserver     CudaJob          s2594054          00:00:00 R gpu


Another sample PBS script (n060 node)

#!/bin/bash 
#PBS -m abe
#PBS -M yourEmail@griffith.edu.au 
#PBS -N testImage
#PBS -q dljun@n060
#PBS -W group_list=deeplearning -A deeplearning
#PBS -l select=1:ncpus=1:ngpus=1:mem=12gb,walltime=300:00:00
#PBS -j oe
cd  $PBS_O_WORKDIR
module load anaconda/5.3.1py3
source activate tensorflow-gpu
echo $CUDA_VISIBLE_DEVICES
GPUNUM=`echo $CUDA_VISIBLE_DEVICES`
sleep 2000
##echo "python main.py --cfg cfg/config3.yml --gpu $GPUNUM &"


Installation

We use Tesla nvidia C2050 GPUs.

CUDA-enabled Device Driver

A specific device driver has been installed to support CUDA

http://www.nvidia.com/Download/index.aspx?lang=en-us

Product Type: tesla
Product Series: C-Class
Product: Tesla C2050

http://www.nvidia.com/content/DriverDownload-March2009/confirmation.php?url=/XFree86/Linux-x86_64/285.05.09/NVIDIA-Linux-x86_64-285.05.09.run&lang=us&type=GeForce

tesla c-2050 drivers ==>

-----------------------
 /sbin/lspci | grep -i NVIDIA
03:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
03:00.1 Audio device: nVidia Corporation GF100 High Definition Audio Controller (rev a1)
85:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
85:00.1 Audio device: nVidia Corporation GF100 High Definition Audio Controller (rev a1)

-----------------------
/sbin/lspci | grep -i NVIDIA | grep "VGA compatible controller"
03:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)
85:00.0 VGA compatible controller: nVidia Corporation GF100 [Tesla C2050 / C2070] (rev a3)

-----------------------

 ls /dev/nvidia*
/dev/nvidia0  /dev/nvidia1  /dev/nvidiactl

-----------------------

 /sbin/modprobe -l|grep -i nvidia
kernel/drivers/video/backlight/mbp_nvidia_bl.ko
kernel/drivers/video/nvidia/nvidiafb.ko
kernel/drivers/video/nvidia.ko

-----------------------


sh NVIDIA-Linux-x86_64-285.05.09.run --list|awk '{print $6}'

/32/
./32/libnvidia-glcore.so.285.05.09
./32/tls/
./32/tls/libnvidia-tls.so.285.05.09
./32/libOpenCL.so.1.0.0
./32/vdpau/
./32/libvdpau.so.285.05.09
./32/libvdpau_nvidia.so.285.05.09
./32/libGL.la
./32/libvdpau_trace.so.285.05.09
./32/libnvidia-tls.so.285.05.09
./32/libcuda.so.285.05.09
./32/libnvidia-ml.so.285.05.09
./32/libGL.so.285.05.09
./32/libnvidia-compiler.so.285.05.09
./libnvidia-glcore.so.285.05.09
./libnvcuvid.so.285.05.09
./libXvMCNVIDIA.so.285.05.09
./gl.h
./libglx.so.285.05.09
./tls/
./tls/libnvidia-tls.so.285.05.09
./NVIDIA_Changelog
./nvidia-debugdump
./makeself.sh
./libOpenCL.so.1.0.0
./libvdpau.so.285.05.09
./libvdpau_nvidia.so.285.05.09
./mkprecompiled
./pkg-history.txt
./LICENSE
./libGL.la
./nvidia-settings
./libvdpau_trace.so.285.05.09
./nvidia-settings.desktop
./README.txt
./nvidia_drv.so
./glx.h
./nvidia.icd
./nvidia-bug-report.sh
./nvidia-smi.1.gz
./libnvidia-cfg.so.285.05.09
---------------------------------

<snip>


---------------------------------
./kernel/rmil.h
./kernel/xapi-sdk.h
./kernel/os-smp.c
./kernel/nv-vm.c
./kernel/os-agp.c
./kernel/os-usermap.c
./kernel/nv-linux.h
./glxext.h
./libXvMCNVIDIA.a

Software

CUDA Toolkit

The CUDA Toolkit has all the development tools, libraries, and documentation you need to create applications for the CUDA architecture, including:

CUDA C/C++ Compiler
GPU Debugging & Profiling Tools CUDA-GDB debugger
GPU-Accelerated Math Libraries and Performance Primitives
(GPU-accelerated BLAS library,GPU-accelerated FFT library,GPU-accelerated Sparse Matrix library,GPU-accelerated RNG library)
C/C++ compiler
Visual Profiler
Additional tools and documentation

http://developer.nvidia.com/cuda-toolkit

sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list

sh cudatoolkit_4.0.17_linux_64_rhel6.0.run --list|awk '{print $6}'|sed 's/^./\/usr\/local\/cuda/g'

/usr/local/cuda/
/usr/local/cuda/install-linux.pl
/usr/local/cuda/doc/
/usr/local/cuda/doc/Thrust_Quick_Start_Guide.pdf
/usr/local/cuda/doc/CUSPARSE_Library.pdf
/usr/local/cuda/doc/cuda-memcheck.pdf
/usr/local/cuda/doc/CUBLAS_Library.pdf
/usr/local/cuda/doc/cuobjdump.pdf
/usr/local/cuda/doc/OpenCL_Implementation_Notes.txt
/usr/local/cuda/doc/OpenCL_Jumpstart_Guide.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.html
/usr/local/cuda/doc/OpenCL_Programming_Overview.pdf
/usr/local/cuda/doc/CUDA_C_Best_Practices_Guide.pdf
/usr/local/cuda/doc/OpenCL_Best_Practices_Guide.pdf
/usr/local/cuda/doc/ptx_isa_2.3.pdf
/usr/local/cuda/doc/CUDA_C_Programming_Guide.pdf
/usr/local/cuda/doc/ptx_isa_1.4.pdf
/usr/local/cuda/doc/CUDA_Toolkit_Reference_Manual.pdf
/usr/local/cuda/doc/Fermi_Tuning_Guide.pdf
/usr/local/cuda/doc/html/
---------------------------------

<snip>


---------------------------------

/usr/local/cuda/computeprof/projects/analysis_boxFilter_Context_0.csv
/usr/local/cuda/computeprof/projects/eigenvalues_eigenvalues_Context_0.csv
/usr/local/cuda/computeprof/projects/analysis_convolutionSeparable_Context_0.csv
/usr/local/cuda/computeprof/projects/MonteCarloMultiGPU_Session1_Context_2.csv
/usr/local/cuda/computeprof/Compute_Visual_Profiler_Release_Notes_Linux.txt
/usr/local/cuda/src/
/usr/local/cuda/src/fortran_thunking.c
/usr/local/cuda/src/icc_math.h.diff
/usr/local/cuda/src/fortran_thunking.h
/usr/local/cuda/src/fortran_common.h
/usr/local/cuda/src/fortran.c
/usr/local/cuda/src/cusparse_fortran.h
/usr/local/cuda/src/cusparse_fortran.c
/usr/local/cuda/src/fortran.h
/usr/local/cuda/src/cusparse_fortran_common.h

CUDA SDK - gpucomputingsdk

Installation Directory: /usr/local/cuda/NVIDIA_GPU_Computing_SDK

http://developer.nvidia.com/gpu-computing-sdk

The NVIDIA GPU Computing SDK provides hundreds of code samples, white papers, to help you get started on the path of writing software with CUDA C/C++, OpenCL or DirectCompute.

http://developer.download.nvidia.com/compute/cuda/4_0/sdk/gpucomputingsdk_4.0.17_linux.run

sh gpucomputingsdk_4.0.17_linux.run --list|awk '{print $6}'

./sdk/
./sdk/doc/
./sdk/doc/release/
./sdk/doc/release/CUDA_SDK_New_Features_Guide.pdf
./sdk/doc/release/Getting_Started_With_CUDA_SDK_Samples.pdf
./sdk/doc/release/License.pdf
./sdk/doc/GPU_COMPUTING_SDK_Description.rtf
./sdk/doc/CUDA_SDK_Release_Notes.txt
./sdk/doc/OpenCL_Release_Notes.txt
./sdk/shared/
./sdk/shared/inc/
./sdk/shared/inc/GL/
./sdk/shared/inc/GL/freeglut.h
./sdk/shared/inc/GL/freeglut_ext.h
./sdk/shared/inc/GL/freeglut_std.h
./sdk/shared/inc/GL/gl.h
./sdk/shared/inc/GL/glew.h
./sdk/shared/inc/GL/glext.h
./sdk/shared/inc/GL/glu.h
./sdk/shared/inc/GL/glut.h
./sdk/shared/inc/GL/glxew.h
./sdk/shared/inc/GL/glxext.h
./sdk/shared/inc/GL/wglew.h
./sdk/shared/inc/cmd_arg_reader.h
./sdk/shared/inc/dynlink/
./sdk/shared/inc/dynlink/channel_descriptor_dynlink.h
./sdk/shared/inc/dynlink/common_functions_dynlink.h
./sdk/shared/inc/dynlink/cuda_drvapi_dynlink.h
./sdk/shared/inc/dynlink/cuda_runtime_api_dynlink.h
./sdk/shared/inc/dynlink/cuda_runtime_dynlink.h
./sdk/shared/inc/dynlink/cuda_texture_types_dynlink.h
./sdk/shared/inc/dynlink/device_functions_dynlink.h
./sdk/shared/inc/dynlink/math_functions_dbl_ptx3_dynlink.h
./sdk/shared/inc/dynlink/math_functions_dynlink.h
./sdk/shared/inc/dynlink/texture_fetch_functions_dynlink.h
./sdk/shared/inc/exception.h
./sdk/shared/inc/multithreading.h
./sdk/shared/inc/nvGLWidgets.h
./sdk/shared/inc/nvGlutWidgets.h
./sdk/shared/inc/nvMath.h
./sdk/shared/inc/nvMatrix.h
./sdk/shared/inc/nvQuaternion.h
./sdk/shared/inc/nvShaderUtils.h
./sdk/shared/inc/nvVector.h
./sdk/shared/inc/nvWidgets.h
./sdk/shared/inc/rendercheckGL.h
./sdk/shared/inc/shrQATest.h
./sdk/shared/inc/shrUtils.h
./sdk/shared/inc/stopwatch.h
./sdk/shared/inc/stopwatch_base.h
./sdk/shared/inc/stopwatch_base.inl
./sdk/shared/inc/stopwatch_linux.h
./sdk/shared/lib/
---------------------------------
<snip>
---------------------------------
./sdk/OpenCL/src/oclVectorAdd/oclVectorAdd.cpp
./sdk/OpenCL/src/oclVolumeRender/
./sdk/OpenCL/src/oclVolumeRender/Makefile
./sdk/OpenCL/src/oclVolumeRender/data/
./sdk/OpenCL/src/oclVolumeRender/data/Bucky.raw
./sdk/OpenCL/src/oclVolumeRender/doc/
./sdk/OpenCL/src/oclVolumeRender/doc/sshot_lg.JPG
./sdk/OpenCL/src/oclVolumeRender/doc/sshot_md.jpg
./sdk/OpenCL/src/oclVolumeRender/doc/sshot_sm.JPG
./sdk/OpenCL/src/oclVolumeRender/oclVolumeRender.cpp
./sdk/OpenCL/src/oclVolumeRender/volumeRender.cl
./sdk/OpenCL/src/oclInlinePTX/
./sdk/OpenCL/src/oclInlinePTX/Makefile
./sdk/OpenCL/src/oclInlinePTX/inlinePTX.cl
./sdk/OpenCL/src/oclInlinePTX/oclInlinePTX.cpp
./sdk/OpenCL/releaseNotesData/
./sdk/OpenCL/releaseNotesData/GEF8_2D_wte.gif
./sdk/OpenCL/releaseNotesData/GEF9_2D_wte.gif
./sdk/OpenCL/releaseNotesData/GEFGTX200_2D_wte.gif
./sdk/OpenCL/releaseNotesData/NVSphere.ico
./sdk/OpenCL/releaseNotesData/QUA_FX_4600_White.gif
./sdk/OpenCL/releaseNotesData/link.jpg
./sdk/OpenCL/releaseNotesData/tesla.gif
./sdk/OpenCL/Makefile
./sdk/OpenCL/Samples.html
./sdk/Documentation.html
./sdk/Makefile
./sdk/License.txt
./sdk/cudpp_license.txt

Installation

 module load cuda/4.0.17;cd /tmp/tmp2;sh gpucomputingsdk_4.0.17_linux.run 

Enter install path (default ~/NVIDIA_GPU_Computing_SDK): /usr/local/cuda/NVIDIA_GPU_Computing_SDK

Located CUDA at /usr/local/cuda
If this is correct, choose the default below.
If it is not correct, enter the correct path to CUDA

Enter CUDA install path (default /usr/local/cuda):


-------------------------

<snip>

-------------------------

========================================

Configuring SDK Makefile (/usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/common/common.mk)...

========================================

* Please make sure your PATH includes /usr/local/cuda/bin
* Please make sure your LD_LIBRARY_PATH includes /usr/local/cuda/lib

* To uninstall the NVIDIA GPU Computing SDK, please delete /usr/local/cuda/NVIDIA_GPU_Computing_SDK
* Installation Complete


Image Install
=============

mount --bind /proc/ /compute/proc/
mount --bind /dev /compute/dev
chroot /compute/
 module load cuda/4.0.17
cd /tmp;sh gpucomputingsdk_4.0.17_linux.run
exit
umount /compute/dev
umount /compute/proc

Matlab Plug-in for CUDA

Not Installed at this time but can be install on request

http://developer.nvidia.com/cuda-tools-ecosystem

N/A

CUDA Visual Profiler

manual: http://confluence.rcs.griffith.edu.au:8080/download/attachments/25428765/Compute_Visual_Profiler_User_Guide.pdf

This is installed with the CUDA toolkit.

module load cuda/4.0.17
computeprof &

http://developer.nvidia.com/cuda-tools-ecosystem

N/A

other compilation

Install the following packages before comiling:

freeglut-2.6.0-1.el6.x86_64.rpm libdrm-devel-2.4.23-1.el6.x86_64.rpm mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm mesa-libGLU-devel-7.10-1.el6.x86_64.rpm

cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C
 module load cuda/4.0.17
make
(or:  make 2>&1 |tee make.output.txt)

Run the sample codes.

 module load cuda/4.0.17
cd /usr/local/cuda/NVIDIA_GPU_Computing_SDK/C/bin/linux/release
./deviceQuery
./nbody

 ./deviceQuery
[deviceQuery] starting...
./deviceQuery Starting...

 CUDA Device Query (Runtime API) version (CUDART static linking)

Found 2 CUDA Capable device(s)

Device 0: "Tesla C2070"
  CUDA Driver Version / Runtime Version          4.0 / 4.0
  CUDA Capability Major/Minor version number:    2.0
  Total amount of global memory:                 5375 MBytes (5636554752 bytes)
  (14) Multiprocessors x (32) CUDA Cores/MP:     448 CUDA Cores
  GPU Clock Speed:                               1.15 GHz
  Memory Clock rate:                             1494.00 Mhz
  Memory Bus Width:                              384-bit
  L2 Cache Size:                                 786432 bytes
  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 32768
  Warp size:                                     32
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
  Maximum memory pitch:                          2147483647 bytes
  Texture alignment:                             512 bytes
  Concurrent copy and execution:                 Yes with 2 copy engine(s)
  Run time limit on kernels:                     No
  Integrated GPU sharing Host Memory:            No
  Support host page-locked memory mapping:       Yes
  Concurrent kernel execution:                   Yes
  Alignment requirement for Surfaces:            Yes
  Device has ECC support enabled:                Yes
  Device is using TCC driver mode:               No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:           3 / 0
  Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

Device 1: "Tesla C2070"
  CUDA Driver Version / Runtime Version          4.0 / 4.0
  CUDA Capability Major/Minor version number:    2.0
  Total amount of global memory:                 5375 MBytes (5636554752 bytes)
  (14) Multiprocessors x (32) CUDA Cores/MP:     448 CUDA Cores
  GPU Clock Speed:                               1.15 GHz
  Memory Clock rate:                             1494.00 Mhz
  Memory Bus Width:                              384-bit
  L2 Cache Size:                                 786432 bytes
  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 32768
  Warp size:                                     32
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
  Maximum memory pitch:                          2147483647 bytes
  Texture alignment:                             512 bytes
  Concurrent copy and execution:                 Yes with 2 copy engine(s)
  Run time limit on kernels:                     No
  Integrated GPU sharing Host Memory:            No
  Support host page-locked memory mapping:       Yes
  Concurrent kernel execution:                   Yes
  Alignment requirement for Surfaces:            Yes
  Device has ECC support enabled:                Yes
  Device is using TCC driver mode:               No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:           133 / 0
  Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.0, CUDA Runtime Version = 4.0, NumDevs = 2, Device = Tesla C2070, Device = Tesla C2070
[deviceQuery] test results...
PASSED

Press ENTER to exit...

Demo

fluidsGL
smokeParticles
particles
postProcessGL

Ref:

1. http://us.download.nvidia.com/XFree86/Linux-x86_64/275.09.07/NVIDIA-Linux-x86_64-275.09.07.run
2. http://www.nvidia.com/Download/index.aspx?lang=en-us
3. http://en.wikipedia.org/wiki/Nvidia_Tesla#Specifications_and_configurations
4. http://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications
5. http://code.google.com/p/stanford-cs193g-sp2010/wiki/TutorialWhenSomethingGoesWrong

notes

Install the following packages before compiling:

freeglut-2.6.0-1.el6.x86_64.rpm        libdrm-devel-2.4.23-1.el6.x86_64.rpm     mesa-libGL-devel-7.10-1.el6.x86_64.rpm
freeglut-devel-2.6.0-1.el6.x86_64.rpm  libXxf86vm-devel-1.1.0-1.el6.x86_64.rpm  mesa-libGLU-devel-7.10-1.el6.x86_64.rpm




notes2

ln -s /usr/lib64/libGLU.so.1.3.071000  /usr/lib64/libGLU.so
ln -s /usr/lib64/libglut.so.3 /usr/lib64/libglut.so
cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/oclVolumeRender


cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/


cp -r /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL  /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/common/inc/CL/

ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL/src/oclMarchingCubes/GL



ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBandwidthTest/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBlackScholes/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclBoxFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclConvolutionSeparable/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclCopyComputeOverlap/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDCT8x8/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDeviceQuery/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDotProduct/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclDXTCompression/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclFDTD3d/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHiddenMarkovModel/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclHistogram/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclInlinePTX/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMarchingCubes/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMatrixMul/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMatVecMul/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMedianFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclMersenneTwister/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclNbody/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclParticles/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclPostprocessGL/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclQuasirandomGenerator/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclRadixSort/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclRecursiveGaussian/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclReduction/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclScan/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleGL/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleMultiGPU/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSimpleTexture3D/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSobelFilter/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclSortingNetworks/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclTranspose/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclTridiagonal/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclVectorAdd/GL
ln -s /sw/cuda/CUDAToolsSDK/4.0.17/CUPTI/include/GL oclVolumeRender/GL


cd /sw/cuda/NVIDIA_GPU_Computing_SDK/4.0.17/OpenCL
make


CuDDN

cudnn-10.0
============
tar -zxvf cudnn-10.0-linux-x64-v7.6.5.32.tgz 
cuda/include/cudnn.h
cuda/NVIDIA_SLA_cuDNN_Support.txt
cuda/lib64/libcudnn.so
cuda/lib64/libcudnn.so.7
cuda/lib64/libcudnn.so.7.6.5
cuda/lib64/libcudnn_static.a

cp cuda/include/cudnn.h /usr/local/cuda-10.0/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.0/lib64
chmod a+r /usr/local/cuda-10.0/include/cudnn.h /usr/local/cuda-10.0/lib64/libcudnn*


cudnn-10.1
==========

cd /tmp; tar -zxvf cudnn-10.1-linux-x64-v7.6.5.32.tgz
cp cuda/include/cudnn.h /usr/local/cuda-10.1/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.1/lib64
 chmod a+r /usr/local/cuda-10.1/include/cudnn.h /usr/local/cuda-10.1/lib64/libcudnn*

cudnn-10.2
==========

tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 
cp cuda/include/cudnn.h /usr/local/cuda-10.2/include/
cp cuda/lib64/libcudnn* /usr/local/cuda-10.2/lib64
chmod a+r /usr/local/cuda-10.2/include/cudnn.h /usr/local/cuda-10.2/lib64/libcudnn*

Sample pbs script to run on n061 - gpuq2


#!/bin/bash
#PBS -m abe
#PBS -M emailaddress@griffith.edu.au
#PBS -N CudaJob
#PBS -q gpuq2
#PBS -l select=1:ncpus=1:mem=2gb:ngpus=1,walltime=01:00:00
cd $PBS_O_WORKDIR
source $HOME/.bashrc
module load anaconda3/2022.10
source activate TorchA100

echo "Starting job"
python isCuda
echo "Done with job"