Skip to content

Add soykb-generator and upgrade soykb-workflow-worker #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 26 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
FROM alpine:3.9
MAINTAINER Bartosz Balis <[email protected]>

ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.11

RUN apk --update add openjdk7-jre \
&& apk add curl bash npm \
&& apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.9/main/ nodejs=10.14.2-r0 \
&& apk add python3 libpcap libpcap-dev util-linux

RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz

WORKDIR /soykb
COPY software/software.tar.gz .
RUN tar zxvf software.tar.gz
RUN chmod +x software/bwa-0.7.4/bwa
COPY software/*-wrapper ./
COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
COPY software/nethogs-wrapper.py /usr/local/bin
RUN chmod +x /usr/local/bin/nethogs-wrapper.py

ENV PATH="/soykb:${PATH}"
FROM archlinux
MAINTAINER Mateusz Plinta

ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.13

RUN pacman -Sy
RUN pacman -S --needed --noconfirm git jre7-openjdk npm python3 libpcap util-linux base-devel libffi glibc lib32-glibc

RUN pacman -S --needed --noconfirm sudo
RUN useradd builduser -m
RUN passwd -d builduser
RUN printf 'builduser ALL=(ALL) ALL\n' | tee -a /etc/sudoers
RUN sudo -u builduser bash -c 'cd ~ && git clone https://aur.archlinux.org/ncurses5-compat-libs.git && cd ncurses5-compat-libs && makepkg -si --skippgpcheck --noconfirm'
RUN sudo -u builduser bash -c 'cd ~ && git clone https://aur.archlinux.org/libffi6.git && cd libffi6 && makepkg -si --noconfirm'

RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz

WORKDIR /soykb
COPY software/software.tar.gz .
RUN tar zxvf software.tar.gz
COPY software/*-wrapper ./
COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
COPY software/nethogs-wrapper.py /usr/local/bin
RUN chmod +x /usr/local/bin/nethogs-wrapper.py

ENV PATH="/soykb:${PATH}"
24 changes: 24 additions & 0 deletions Dockerfile.alpine
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# FROM alpine:3.11
FROM frolvlad/alpine-glibc
MAINTAINER Bartosz Balis <[email protected]>

ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.11

RUN apk --update add openjdk7-jre \
&& apk add curl bash ncurses ncurses5 ncurses5-libs npm \
# && apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.11/main/ nodejs=10.14.2-r0 \
&& apk add python3 libpcap libpcap-dev util-linux

RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz

RUN ln -s /usr/lib/libncurses.so.5 /usr/lib/libtinfo.so.5

WORKDIR /soykb
COPY software/software.tar.gz .
RUN tar zxvf software.tar.gz
COPY software/*-wrapper ./
COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
COPY software/nethogs-wrapper.py /usr/local/bin
RUN chmod +x /usr/local/bin/nethogs-wrapper.py

ENV PATH="/soykb:${PATH}"
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
TAG = $(shell git describe --tags --always)
PREFIX = hyperflowwms
REPO_NAME = soykb-workflow-worker
# PREFIX = $(shell git config --get remote.origin.url | tr ':.' '/' | rev | cut -d '/' -f 3 | rev)
# REPO_NAME = $(shell git config --get remote.origin.url | tr ':.' '/' | rev | cut -d '/' -f 2 | rev)

REPO_NAME = 'soykb-worker'
PREFIX = 'hyperflowwms'

all: push

container: image

image:
docker build -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
docker build --no-cache -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
docker tag $(PREFIX)/$(REPO_NAME) $(PREFIX)/$(REPO_NAME):$(TAG) # Add the version tag to the latest image

push: image
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# Soykb workflow for HyperFlow

[![](https://images.microbadger.com/badges/version/hyperflowwms/soykb-workflow-worker.svg)](https://microbadger.com/images/hyperflowwms/soykb-workflow-worker "Get your own version badge on microbadger.com")
## Generate example workflows

Generate example workflow:
- `genwf-size2.sh` (size 2)

The scripts invoke Docker images and create:
- `data` subdirectory with workflow `workflow.json` and `haplotype-files.list`

You can also directly use the `hyperflowwms/soykb-generator` image to generate other workflows, see the scripts for command examples. For example, to generate smaller workflows, use a smaller value of the fastq files parameter.


## Build and publish image
HyperFlow Docker image contains Soykb binaries and HyperFlow job executor
Expand Down
3 changes: 3 additions & 0 deletions genwf-size2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh

docker run -v $PWD:/workdir hyperflowwms/soykb-generator sh -c 'generate-workflow 2'
8 changes: 8 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
# This script runs everything in containers, so that you only need Docker on your host machine

echo Before running this script, start Redis container as follows:
echo docker run -d --name redis redis --bind 127.0.0.1
echo

docker run -a stdout -a stderr --rm --network container:redis -e HF_VAR_WORKER_CONTAINER="hyperflowwms/soykb-workflow-worker" -e HF_VAR_WORK_DIR="$PWD/data" -e HF_VAR_HFLOW_IN_CONTAINER="true" -e HF_VAR_function="redisCommand" -e REDIS_URL="redis://127.0.0.1:6379" --name hyperflow -v /var/run/docker.sock:/var/run/docker.sock -v $PWD:/wfdir --entrypoint "/bin/sh" hyperflowwms/hyperflow:v1.3.23 -c "apk add docker && hflow run /wfdir"
2 changes: 2 additions & 0 deletions software/bwa-wrapper
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@

set -e

export TMPDIR=`pwd`

/soykb/software/bwa-0.7.4/bwa "$@"

18 changes: 10 additions & 8 deletions software/gatk-wrapper
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ export TMPDIR=`pwd`

OUTFILE=`mktemp -t gatk-output.XXXXXXXXXX` || exit 1

# memory depends on what subsystem we call
OPTIONS="-Xmx2g -XX:+UseSerialGC"
#if (echo "'$@'" | grep -i "HaplotypeCaller") >/dev/null; then
# OPTIONS="-Xmx2g -XX:+UseSerialGC"
#elif (echo "'$@'" | grep -i "CombineGVCFs") >/dev/null; then
# OPTIONS="-Xmx17g -XX:+UseSerialGC"
#fi
OPTIONS="$OPTIONS -Djava.io.tmpdir=$TMPDIR"
# first argument is memory, rest is GATK args
MEM_TOTAL=$1
shift

# Java mx should be a little bit lower than requested memory
MEM_JAVA_MX=$(($MEM_TOTAL - 2))

OPTIONS="-Xmx${MEM_JAVA_MX}g -XX:+UseSerialGC"

OPTIONS="-Djava.io.tmpdir=$TMPDIR $OPTIONS"

java $OPTIONS \
-jar /soykb/software/GenomeAnalysisTK-3.0.0/GenomeAnalysisTK.jar \
Expand Down
7 changes: 7 additions & 0 deletions software/gunzip-wrapper
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -e

gunzip -c $1 > $2


3 changes: 2 additions & 1 deletion software/picard-wrapper
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

set -e
export TMPDIR=`pwd`

java -Xmx2g -XX:+UseSerialGC -jar /soykb/software/picard-tools-1.92/"$@"
java -Djava.io.tmpdir=$TMPDIR -Xmx15g -XX:+UseSerialGC -jar /soykb/software/picard-tools-1.92/"$@"

15 changes: 15 additions & 0 deletions software/samtools-wrapper
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -e

export TMPDIR=`pwd`

# sometimes we are asked to "merge" only one file
if [ "X$1" = "Xmerge" -a "X$4" = "X" ]; then
# just copy
cp "$3" "$2"
exit 0
fi

/soykb/software/samtools-1.0/samtools "$@"

13 changes: 13 additions & 0 deletions software/software-wrapper
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

set -e

if [ ! -d /soykb/software ]; then
tar xzf /soykb/software.tar.gz
fi

# fix for leftover files in the home directory at TACC
find ~/ -maxdepth 1 -name slurm.\* -mtime +5 -exec rm -f {} \; || /bin/true
find ~/ -maxdepth 1 -name gram\*.log -mtime +5 -exec rm -f {} \; || /bin/true


Binary file modified software/software.tar.gz
Binary file not shown.
3 changes: 3 additions & 0 deletions workflow-generator/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.pyc
software
software.tar.gz
15 changes: 15 additions & 0 deletions workflow-generator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM mhart/alpine-node:12

LABEL maintainer "Mateusz Plinta <[email protected]>"

RUN apk add python-dev

RUN mkdir /soykb-workflow

ADD . /soykb-workflow/

RUN npm install https://github.com/hyperflow-wms/pegasus-hyperflow-converter/archive/master.tar.gz /

ENV PATH /soykb-workflow:/node_modules/.bin:$PATH
ENV PYTHONPATH=/soykb-workflow
WORKDIR /soykb-workflow
20 changes: 20 additions & 0 deletions workflow-generator/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
TAG = $(shell git describe --tags --always)
# PREFIX = $(shell git config --get remote.origin.url | tr ':.' '/' | rev | cut -d '/' -f 3 | rev)
# REPO_NAME = $(shell git config --get remote.origin.url | tr ':.' '/' | rev | cut -d '/' -f 2 | rev)

REPO_NAME = 'soykb-generator'
PREFIX = 'hyperflowwms'

all: push

container: image

image:
docker build -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
docker tag $(PREFIX)/$(REPO_NAME) $(PREFIX)/$(REPO_NAME):$(TAG) # Add the version tag to the latest image

push: image
docker push $(PREFIX)/$(REPO_NAME) # Push image tagged as latest to repository
docker push $(PREFIX)/$(REPO_NAME):$(TAG) # Push version tagged image to repository (since this image is already pushed it will simply create or update version tag)

clean:
93 changes: 93 additions & 0 deletions workflow-generator/Pegasus/AutoADAG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# #
# Copyright 2007-2012 University Of Southern California
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# #

__author__ = 'Rajiv Mayani'

import logging

try:
from Pegasus.DAX3 import ADAG, Job, File, Executable, PFN, Link, When, DuplicateError
except ImportError, e:
logging.error('Include Pegasus Python libraries in your PYTHONPATH')


class AutoADAG(object, ADAG):
"""
Automatically determine the dependencies between jobs based on the file usages.
All jobs consuming a file F depend on the singular job that produces that file.
"""
def __init__(self, name, count=None, index=None):
ADAG.__init__(self, name, count, index)

def writeXML(self, out):

mapping = {}

def addOutput(job, file_obj):

if file_obj:
file_obj = file_obj.name

if file_obj not in mapping:
mapping[file_obj] = (set(), set())

mapping[file_obj][1].add(job)

# Automatically determine dependencies

# Traverse each job
for job_id, job in self.jobs.iteritems():
file_used = job.used

# If job produces to stdout, identify it as an output file
addOutput(job, job.stdout)
# If job produces to stderr, identify it as an output file
addOutput(job, job.stderr)

# If job consumes from stdin, identify it as an input file
if job.stdin:
if job.stdin.name not in mapping:
mapping[job.stdin.name] = (set(), set())

mapping[job.stdin.name][0].add(job)


for file in file_used:

if file.name not in mapping:
mapping[file.name] = (set(), set())

if file.link == Link.INPUT:
mapping[file.name][0].add(job)
else:
mapping[file.name][1].add(job)

for file_name, io in mapping.iteritems():

# Go through the mapping and for each file add dependencies between the
# job producing a file and the jobs consuming the file
inputs = io[0]

if len(io[1]) > 0:
output = io[1].pop()

for input in inputs:
try:
self.depends(parent=output, child=input)
except DuplicateError:
pass

super(AutoADAG, self).writeXML(out)
Loading