lsf_utils.sh 7.77 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

alias bj='bjobs -w'
alias isub='bsub -q interactive -Is bash'
alias isub8='bsub -n 4 -R span[hosts=1] -q interactive -Is bash'


joblist(){
    ## use default joblist-filename or user-provided name
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    # add the job-id to the list
    cat - | tee /dev/stderr |  cut -f2 -d" " | sed 's/[<>]//g' >> $joblistFile
}
export -f joblist


jlistKill(){
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

#    cat $joblistFile | xargs -L1 bkill
    cat $joblistFile | while read id ; do bkill "$id" ; done
}
export -f jlistKill


killByName(){
    echo killing jobs which include: $1 ...
    bjobs -w | grep $1 | awk '{ print $1 }'   | while read id ; do bkill $id ; done
}
export -f killByName


jlistBtop(){
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    cat $joblistFile | xargs -L1 btop
}
export -f jlistBtop


jlistStatus(){
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    bjobs -w | grep -Ff $joblistFile
}
export -f jlistStatus


jlistReport(){
    if [ $# -ne 1 ]; then
        joblistFile=$(ls -a | grep "cluster_snapshots.txt" | sed "s/.cluster_snapshots.txt//g")
    else
        joblistFile=$1
    fi

Holger Brandl's avatar
Holger Brandl committed
72
73
74
75
#    ## add spin.R
#    export PATH=/projects/bioinfo/holger/bioinfo_templates/misc:$PATH
#    source $(which spin_utils.sh)

76
77
78
79
80
81
82
83

#    if [ -n "$(which rend.R)" ]; then
    if [ -n "$(type rendr_snippet)" ]; then
        curl https://raw.githubusercontent.com/holgerbrandl/datautils/master/bash/CreateJobReport.R 2>&1 2>/dev/null | rendr_snippet ${joblistFile}.report $joblistFile
    else
        ## fall back to plain execution of the report. This will just create a generic Rplots.pdf
        wget --no-check-certificate https://raw.githubusercontent.com/holgerbrandl/datautils/master/bash/CreateJobReport.R
        chmod u+x CreateJobReport.R
84
        ./CreateJobReport.R $joblistFile
85
86
87

        rm CreateJobReport.R
    fi
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
}
export -f jlistReport


wait4jobs(){
    ## use default joblist-filename or user-provided name
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    # wait until all jobs from the list are done
    sleep 2
    while [ -n "$(bjobs 2>&1 | grep -f $joblistFile)" ]; do
        sleep 15; ## or use bparams output
    done

    # remove the joblist-file
Holger Brandl's avatar
Holger Brandl committed
107
#    rm $joblistFile
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
}
export -f wait4jobs


blockScript(){
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    joblist $joblistFile
    echo "waiting now for joblist:
     $(cat $joblistFile)"

    ## add jobs to top of queue
    jlistBtop $joblistFile

    wait4jobs $joblistFile
}
export -f blockScript



Holger Brandl's avatar
Holger Brandl committed
132
# note this should be the default. There's no point in NOT doing it
133
134
135
136
137
138
139
140
141
142
143
144
145
wait4jobsReport(){
    ## use default joblist-filename or user-provided name
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

#    rm $joblistFile.cluster_usage.txt $joblistFile.cluster_snapshots.txt

    # wait until all jobs from the list are done
    sleep 2
    while [ -n "$(bjobs 2>&1 | grep -f $joblistFile)" ]; do
Holger Brandl's avatar
Holger Brandl committed
146
        sleep 30; ## or use bparams output
147
148
149
150
151
152
        export curTime=$(date +"%d-%m-%Y_%H:%M:%S")
#        bjobs -W $(cat $joblistFile ) 2>/dev/null | sed 's/ \+/\t/g' | tail -n +2  | awk -v OFS='\t' '{print $0, ENVIRON["curTime"]}'  >> $joblistFile.cluster_snapshots.txt
        bjobs -W $(cat $joblistFile ) 2>/dev/null | sed 's/ \+/\t/g' | tail -n +2  | awk -v OFS='\t' '{print $0, ENVIRON["curTime"]}'  >> $joblistFile.cluster_snapshots.txt
    done

#    bjobs -W $(cat $joblistFile )  >> $joblistFile.cluster_usage.txt
Holger Brandl's avatar
Holger Brandl committed
153
154
    jlistReport $joblistFile

155
156

    # remove the joblist-file
Holger Brandl's avatar
Holger Brandl committed
157
#    rm $joblistFile
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
}
export -f wait4jobsReport


blockScriptReport(){
    if [ $# -ne 1 ]; then
        joblistFile=".jobs"
    else
        joblistFile=$1
    fi

    joblist $joblistFile
    echo "waiting now for joblist:
     $(cat $joblistFile)"

    ## add jobs to top of queue
    jlistBtop $joblistFile

    wait4jobsReport $joblistFile
}
export -f blockScriptReport


mailme(){
    echo "Subject:"$1 "$2" | sendmail -v $(whoami)@mpi-cbg.de > /dev/null ;
}
export -f mailme


lsloop(){
    while :
    do
        lsload | sort -k1
        sleep 1
    done
}


bjloop(){
    while :
    do
        echo "----------------------------------------------------------------------------------------------------"
        ## http://theunixshell.blogspot.de/2012/12/print-first-80-characters-in-line.html
        bjobs -w | head -n 50 | cut -c1-100
        bjobs | awk '{print $3}' | sort | uniq -c | head -n2
203
        sleep 5
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    done
}


bstatus(){ bjobs -u all | awk '{print $2 " " $4}' | sort | uniq -c ; }


bjsloop(){
    while :
    do
        bjobs | grep short |  head -n 50
        sleep 1
    done
}

retouch(){
    find $1 | xargs -n1 touch
}
export -f retouch

Holger Brandl's avatar
Holger Brandl committed
224
225
226
227
228
229
230
231
232
233
234
keepDirAlive(){
    targetDir=$1
     while true; do
        echo "touching $targetDir"; sleep 10000
        retouch $1
    done
}
#usage keepDirAlive /tmp/local_r_packages &; disown

## kill processes by partial name
## http://stackoverflow.com/questions/8987037/how-to-kill-all-processes-with-a-given-partial-name
235
236
237
238
239
240
241
242
243
244

# old output redirection scheme
#mysub(){
#    if [ $# -lt 2 ]; then echo "Usage: mysub <jobname> <script> [<additional bsub arguments>]"; return; fi
#
#    jobName=$1; shift
#    jobCmd=$1; shift
#
#    bsub -e $jobName.err.log -o $jobName.out.log -J $jobName $@ "$jobCmd"
#}
245

246
247
248
249
250
251
mysub(){
    if [ $# -lt 2 ]; then echo "Usage: mysub <jobname> <script> [<additional bsub arguments>]"; return; fi

    jobName=$(echo $1| tr ' ' '_'); shift
    jobCmd=$1; shift

252
253
254
255
    # if dry run is defined just output submission call into $DRY_RUN
    # export DRY_RUN="dry_run.txt"
    if [ -n "$DRY_RUN" ]; then echo "${jobName}:\t\t$jobCmd" >> $DRY_RUN; return; fi

256
257
258
    ## create hidden log file directory if not present
    if [ ! -d .logs ]; then mkdir .logs; fi

259
    ## also log job command and queuing arguments for reference
Holger Brandl's avatar
Holger Brandl committed
260
261
262
263
264

    if [ -f ".logs/${jobName}.cmd" ]; then
        echo "could not run '$jobName' becayse log entry already exists" 1>&2; return;
    fi

265
266
    echo ${jobCmd} > .logs/${jobName}.cmd
    echo $@ > .logs/${jobName}.lsfargs
Holger Brandl's avatar
Holger Brandl committed
267
    echo "" > .logs/${jobName}.jobid // reset the id file
268

Holger Brandl's avatar
Holger Brandl committed
269
    ## use bsub if available, otherwise fall back to simple eval and ignore other arguments
270
    if [ -n "$(command -v bsub)" ] && [ -z "$LOCAL_RUN" ]; then
Holger Brandl's avatar
Holger Brandl committed
271
#       echo "submitting job ${jobName}"
Holger Brandl's avatar
Holger Brandl committed
272
       bsub  -J $jobName $@ "( $jobCmd ) 2>.logs/${jobName}.err.log 1>.logs/${jobName}.out.log" | joblist .logs/${jobName}.jobid 2>&1
Holger Brandl's avatar
Holger Brandl committed
273
    else
Holger Brandl's avatar
Holger Brandl committed
274
       echo "using eval instead of bsub for ${jobName}" >&2
275
       eval $jobCmd 2>.logs/${jobName}.err.log 1>.logs/${jobName}.out.log
Holger Brandl's avatar
Holger Brandl committed
276
    fi
277
278
}
export -f mysub
Holger Brandl's avatar
Holger Brandl committed
279
#mysub "test" "ls"
280
281
282
#mysub testjob "echo test; echo  blabla 1>&2;" -q medium


Holger Brandl's avatar
Holger Brandl committed
283
284
285
## really needed ??
#rm_emptylogs(){ find . -maxdepth 1 -name ".log" -type f -empty -print0 | xargs -0 echo rm -f ; }
#export -f rm_emptylogs
286
287


Holger Brandl's avatar
Holger Brandl committed
288
289
290
291
ziprm(){
    if [ $# -lt 2 ]; then echo "Usage: ziprm <tarbasename> [<file>]+"; return; fi

    tarName=$(date +'%y%m%d')_"$1".tar.gz; shift
292
293
    tar czf $tarName $@; rm $@;
}
Holger Brandl's avatar
Holger Brandl committed
294
export -f ziprm
295
296
297


## lock a node
Holger Brandl's avatar
Holger Brandl committed
298
nlock(){
299
300
301
302
303
304
    bsub -J "node_locker" -R span[hosts=1] -n 6 -q long 'echo "locked $HOSTNAME" >> ~/locked_worker.txt; sleep 10h' | joblist /tmp/tmp.gHDskZ7c77

    mailme "locked node: $(tail -n1 ~/locked_worker.txt | cut -d' ' -f2)"
#    ssx $(tail -n1 ~/locked_hosts.txt | cut -d' ' -f2)
#    jlistKill $tmpJoblistFile
}
Holger Brandl's avatar
Holger Brandl committed
305
export -f nlock
306
307
308
309
310
311
312
313
314

#isubNode(){
#    tmpJoblistFile=$(mktemp)
#    bsub -J "node_locker" -R span[hosts=1] -n 8 -q long 'echo "locked $HOSTNAME" >> ~/locked_hosts.txt; sleep 10h'  | wait4jobs $tmpJoblistFile
#    ssx $(tail -n1 ~/locked_hosts.txt | cut -d' ' -f2)
#    jlistKill $tmpJoblistFile
#}