TORQUE Maui hmli@ustc.edu.cn 2008 1 1 TORQUE 2 1.1 TORQUE........................... 2 1.2 TORQUE...................... 2 1.3 TORQUE.......................... 4 1.4 TORQUE........................... 4 2 Maui 5 2.1 Maui............................ 5 2.2 Maui............................ 5 3 6 3.1................................... 7 3.2................................... 8 3.3.............................. 8 3.3.1 qstat.................... 9 3.3.2 qhold........................... 10 3.3.3 qrls............................ 10 1
3.3.4 qdel canceljob..................... 10 3.3.5 checkjob....................... 11 3.3.6 qorder.................. 12 3.3.7 qselect............ 12 3.3.8 showq................... 13 3.3.9 pbsnodes qnodes................. 13 2
1 TORQUE TORQUE Maui http://www.clusterresources.com TORQUE http://www.clusterresources.com/torquedocs21/ Maui http://www.clusterresources.com/products/maui/docs/ mauiusers.shtml 1.1 TORQUE kd50 node0101 root@kd50# tar zxvf torque-2.2.1.tar.gz root@kd50# cd torque-2.2.1 root@kd50#./configure prefix=/opt/torque-2.2.1 with-rcp=rcp with-rcp=rcp rsh withrcp=scp scp rcp scp root@kd50# make root@kd50# make install 1.2 TORQUE TORQUE /etc/profile TORQUE=/opt/torque 2.2.1 MAUI=/opt/maui 3.2.6p20 if [ `id u` eq 0 ]; then PATH= /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin: PATH=$PATH:$TORQUE/bin:$TORQUE/sbin:$MAUI/bin:$MAUI/sbin else PATH= /usr/local/bin:/usr/bin:/bin:/usr/games:$torque/bin:$maui/bin PATH=$PATH:$TORQUE/bin:$MAUI/bin fi Maui Maui Maui 3
source /etc/profile root TORQUE root@kd50#./torque setup root /var/spool/torque/server priv/nodes kd50 node0101 node0101 node0101 np=2 /var/spool/torque spool undelivered drwxrwxrwt chmod 1777 spool undelivered root@kd50# pbs server -t create root@kd50# qmgr Qmgr: dque Qmgr: create queue dque queue type=execution Qmgr: set server default queue=dque Qmgr: set queue dque started=true Qmgr: set queue dque enabled=true Qmgr: set server scheduling=true pbs server pbs server # shutdown server qterm t quick # start server pbs server # verify all queues are properly configured qstat q # view additional server configuration qmgr c 'p s' # verify all nodes are correctly reporting pbsnodes a # submit a basic job echo sleep 30 qsub 4
# verify jobs display qstat 1.3 TORQUE TORQUE root@kd50# make packages torque-package-clients-linux-i686.sh torque-package-devel-linux-i686.sh torque-package-doc-linux-i686.sh torque-package-mom-linux-i686.sh torque-package-server-linux-i686.sh root@node0101#./torque-package-clients-linux-i686.sh install 1.4 TORQUE /var/spool/torque TORQUE server name NFS TORQUE /var/spool/torque/mom priv/config $pbsserver kd50 # note: hostname running pbs server $logevent 255 # bitmap of which events to log $usecp kd50:/home /home $pbsserver $usecp home 5
/etc/profile TORQUE=/opt/torque 2.2.1 if [ `id u` eq 0 ]; then PATH= /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin: PATH=$PATH:$TORQUE/bin:$TORQUE/sbin else PATH= /usr/local/bin:/usr/bin:/bin:/usr/games PATH=$PATH:$TORQUE/bin fi source /etc/profile pbs mom 2 Maui TORQUE pbs sched Maui Maui 2.1 Maui root@kd50# tar zxvf maui-3.2.6p20-snap.1182974819.tar.gz root@kd50# cd maui-3.2.6p20 root@kd50#./configure prefix=/opt/maui-3.2.6p20 with-pbs=/opt/torque- 2.2.1 root@kd50# make root@kd50# make install 2.2 Maui /usr/local/maui/maui.cfg SERVERHOST kd50 # primary admin must be first in list ADMIN1 root # Resource Manager Definition RMCFG[KD50] TYPE=PBS@RMNMHOST@ 6
RMTYPE[0] PBS /etc/profile TORQUE=/opt/torque 2.2.1 MAUI=/opt/maui 3.2.6p20 if [ `id u` eq 0 ]; then PATH= /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin: PATH=$PATH:$TORQUE/bin:$TORQUE/sbin:$MAUI/bin:$MAUI/sbin else PATH= /usr/local/bin:/usr/bin:/bin:/usr/games PATH=$PATH:$TORQUE/bin:$MAUI/bin fi source /etc/profile Maui root@kd50# maui pbs sched 3 TORQUE Maui qsub TORQUE Maui qsub TORQUE 7
3.1 serial job.pbs #!/bin/sh #PBS N job name #PBS o job.log #PBS e job.err #PBS q dque cd yourworkdir echo Running on hosts `hostname` echo Time is `date` echo Directory is $PWD echo This job runs on the following nodes: cat $PBS NODEFILE echo This job has allocated 1 node./yourprog 1 TORQUE PBS PBS #PBS qsub yourworkdir dque job name job.log job.err #PBS -N -o -e q job name job.log job.err dque user@kd50: /work$ qsub ser job.pbs 3 7. kd50 37.kd50 37 kd50 1 `hostaname` ` 8
3.2 #!/bin/sh #PBS N job name #PBS o job.log #PBS e job.err #PBS q dque #PBS l nodes=4 cd yourworkdir echo Time is `date` echo Directory is $PWD echo This job runs on the following nodes: cat $PBS NODEFILE NPROCS=`wc l<$pbs NODEFILE` echo This job has allocated $NPROCS nodes mpiexec machinefile $PBS NODEFILE np $NPROCS./yourprog #PBS -l nodes= mpiexec user@kd50: /work$ qsub par job.pbs 3.3 TORQUE Maui canceljob checkjob nqs2pbs nqs pbs pbsnodes printjob qdel 9
qhold qmove qnodes pbsnodes qorder qrls qselect qstat qsub showbf showq showstart tracejob TORQUE Maui 3.3.1 qstat qstat user@kd50: /work$ qstat Job i d Name User Time Use S Queue 4 8. kd50 job name4 user 0 E dque 4 9. kd50 job name1 user 0 0 : 0 0 : 0 0 R dque 5 0. kd50 job name2 user 0 H dque 5 1. kd50 job name3 user 0 Q dque E Q H R 10
3.3.2 qhold qhold qstat H 50.kd50 user@kd50: /work$ qhold 50.kd50 3.3.3 qrls qrls user@kd50: /work$ qrls 50.kd50 3.3.4 qdel canceljob qdel canceljob user@kd50: $ qdel 50.kd50 user@kd50: $ canceljob 51.kd50 11
3.3.5 checkjob checkjob user@kd50: $ checkjob 51.kd50 checking job 51 State : Hold Creds : user : user group : user c l a s s : dque qos :DEFAULT WallTime : 0 0 : 0 0 : 0 0 o f 9 9 : 2 3 : 5 9 : 5 9 SubmitTime : Sun Dec 2 1 9 : 2 2 : 1 9 ( Time Queued Total : 0 0 : 4 6 : 1 3 E l i g i b l e : 0 0 : 2 4 : 4 0 ) Total Tasks : 4 Req [ 0 ] TaskCount : 4 P a r t i t i o n : ALL Network : [NONE] Memory >= 0 Disk >= 0 Swap >= 0 Opsys : [NONE] Arch : [NONE] Features : [NONE] IWD: [NONE] Executable : [NONE] Bypass : 0 StartCount : 0 PartitionMask : [ALL] Flags : RESTARTABLE PE: 4.00 S t a r t P r i o r i t y : 24 cannot s e l e c t job 51 f o r p a r t i t i o n DEFAULT ( non i d l e s t a t e ' Hold ' ) State: Hold user@kd50: $ checkjob 49.kd50 checking job 49 State : Running Creds : user : user group : user c l a s s : dque qos :DEFAULT WallTime : 1 : 0 7 : 1 4 o f 9 9 : 2 3 : 5 9 : 5 9 SubmitTime : Sun Dec 2 1 9 : 0 2 : 1 0 ( Time Queued Total : 0 0 : 0 0 : 0 1 E l i g i b l e : 0 0 : 0 0 : 0 1 ) StartTime : Sun Dec 2 1 9 : 0 2 : 1 1 Total Tasks : 4 Req [ 0 ] TaskCount : 4 P a r t i t i o n : DEFAULT Network : [NONE] Memory >= 0 Disk >= 0 Swap >= 0 Opsys : [NONE] Arch : [NONE] Features : [NONE] 12
NodeCount : 4 A l l o c a t e d Nodes : [ node04 : 1 ] [ node03 : 1 ] [ node02 : 1 ] [ node01 : 1 ] IWD: [NONE] Executable : [NONE] Bypass : 0 StartCount : 1 PartitionMask : [ALL] Flags : RESTARTABLE Reservation '49 ' ( 1:06:52 > 9 9 : 2 2 : 5 3 : 0 7 Duration : 9 9 : 2 3 : 5 9 : 5 9 ) PE: 4.00 S t a r t P r i o r i t y : 1 State: Running 3.3.6 qorder qorder user@kd50: $ qstat Job i d Name User Time Use S Queue 5 2. kd50 job name1 user 0 H dque 5 3. kd50 job name2 user 0 Q dque 5 4. kd50 job name3 user 0 Q dque user@kd50: $ qorder 53.kd50 54.kd50 user@kd50: $ qstat Job i d Name User Time Use S Queue 5 2. kd50 job name1 user 0 H dque 5 4. kd50 job name3 user 0 Q dque 5 3. kd50 job name2 user 0 Q dque qorder 53.kd50 54.kd50 53.kd50 54.kd50 54.kd50 53.kd50 3.3.7 qselect qselect 13
5 2. kd50 user@kd50: $ qselect -s H 3.3.8 showq user@kd50: $ showq ACTIVE JOBS JOBNAME USERNAME STATE PROC REMAINING STARTTIME 52 user Running 4 9 9 : 2 2 : 4 4 : 0 9 Sun Dec 2 2 1 : 0 4 : 3 7 1 Active Job 4 o f 4 P r o c e s s o r s Active (100.00%) IDLE JOBS JOBNAME USERNAME STATE PROC WCLIMIT QUEUETIME 54 user I d l e 4 9 9 : 2 3 : 5 9 : 5 9 Sun Dec 2 2 1 : 0 4 : 4 5 1 I d l e Job BLOCKED JOBS JOBNAME USERNAME STATE PROC WCLIMIT QUEUETIME 53 user Hold 4 9 9 : 2 3 : 5 9 : 5 9 Sun Dec 2 2 1 : 0 4 : 3 7 Total Jobs : 3 Active Jobs : 1 I d l e Jobs : 1 Blocked Jobs : 1 3.3.9 pbsnodes qnodes pbsnodes qnodes free down offline user@kd50: $ pbsnodes -l free node0101 f r e e node0102 f r e e node0104 f r e e 14