Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Forward port #19895 #19903

Merged
merged 2 commits into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 1 addition & 14 deletions ci/Jenkinsfile_utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,7 @@ def get_git_commit_hash() {
}

def publish_test_coverage() {
// CodeCovs auto detection has trouble with our CIs PR validation due the merging strategy
git_commit_hash = get_git_commit_hash()

if (env.CHANGE_ID) {
// PR execution
codecovArgs = "-B ${env.CHANGE_TARGET} -C ${git_commit_hash} -P ${env.CHANGE_ID}"
} else {
// Branch execution
codecovArgs = "-B ${env.BRANCH_NAME} -C ${git_commit_hash}"
}

// To make sure we never fail because test coverage reporting is not available
// Fall back to our own copy of the bash helper if it failed to download the public version
sh "(curl --retry 10 -s https://codecov.io/bash | bash -s - ${codecovArgs}) || (curl --retry 10 -s https://s3-us-west-2.amazonaws.com/mxnet-ci-prod-slave-data/codecov-bash.txt | bash -s - ${codecovArgs}) || true"
sh "curl -s https://codecov.io/bash | bash"
}

def collect_test_results_unix(original_file_name, new_file_name) {
Expand Down
85 changes: 29 additions & 56 deletions ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import yaml

from safe_docker_run import SafeDockerClient
from util import *


Expand Down Expand Up @@ -104,8 +103,7 @@ def default_ccache_dir() -> str:
return os.path.join(os.path.expanduser("~"), ".ccache")


def container_run(docker_client: SafeDockerClient,
platform: str,
def container_run(platform: str,
nvidia_runtime: bool,
docker_registry: str,
shared_memory_size: str,
Expand All @@ -114,17 +112,12 @@ def container_run(docker_client: SafeDockerClient,
environment: Dict[str, str],
dry_run: bool = False) -> int:
"""Run command in a container"""
container_wait_s = 600
#
# Environment setup
#
# set default environment variables
environment.update({
'CCACHE_MAXSIZE': '500G',
'CCACHE_TEMPDIR': '/tmp/ccache', # temp dir should be local and not shared
'CCACHE_DIR': '/work/ccache', # this path is inside the container as /work/ccache is
# mounted
'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful for ccache
# verification.
'CCACHE_DIR': '/work/ccache', # this path is inside the container as /work/ccache is mounted
'CCACHE_LOGFILE': '/tmp/ccache.log', # a container-scoped log, useful for ccache verification.
})
environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in os.environ})

Expand All @@ -136,13 +129,9 @@ def container_run(docker_client: SafeDockerClient,
os.makedirs(local_ccache_dir, exist_ok=True)
logging.info("Using ccache directory: %s", local_ccache_dir)

# Equivalent command
docker_cmd_list = [
"docker",
'run',
"--gpus all" if nvidia_runtime else "",
"--cap-add",
"SYS_PTRACE", # Required by ASAN
# Build docker command
docker_arg_list = [
"--cap-add", "SYS_PTRACE", # Required by ASAN
'--rm',
'--shm-size={}'.format(shared_memory_size),
# mount mxnet root
Expand All @@ -158,40 +147,27 @@ def container_run(docker_client: SafeDockerClient,
'-e', "CCACHE_DIR={}".format(environment['CCACHE_DIR']),
# a container-scoped log, useful for ccache verification.
'-e', "CCACHE_LOGFILE={}".format(environment['CCACHE_LOGFILE']),
'-ti',
tag]
docker_cmd_list.extend(command)
docker_cmd = ' \\\n\t'.join(docker_cmd_list)
logging.info("Running %s in container %s", command, tag)
logging.info("Executing the equivalent of:\n%s\n", docker_cmd)
]
docker_arg_list += [tag]
docker_arg_list.extend(command)

def docker_run_cmd(cmd):
logging.info("Running %s in container %s", command, tag)
logging.info("Executing command:\n%s\n", ' \\\n\t'.join(cmd))
subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr, check=True)

if not dry_run:
#############################
#
signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM})
# noinspection PyShadowingNames
runtime = None
if nvidia_runtime:
# noinspection PyShadowingNames
# runc is default (docker info | grep -i runtime)
runtime = 'nvidia'

return docker_client.run(
tag,
runtime=runtime,
command=command,
shm_size=shared_memory_size,
user='{}:{}'.format(os.getuid(), os.getgid()),
cap_add='SYS_PTRACE',
volumes={
mx_root:
{'bind': '/work/mxnet', 'mode': 'rw'},
local_build_folder:
{'bind': '/work/build', 'mode': 'rw'},
local_ccache_dir:
{'bind': '/work/ccache', 'mode': 'rw'},
},
environment=environment)
if not nvidia_runtime:
docker_run_cmd(['docker', 'run'] + docker_arg_list)
else:
try:
docker_run_cmd(['docker', 'run', '--gpus', 'all'] + docker_arg_list)
except subprocess.CalledProcessError as e:
if e.returncode == 125:
docker_run_cmd(['docker', 'run', '--runtime', 'nvidia'] + docker_arg_list)
else:
raise

return 0


Expand Down Expand Up @@ -292,8 +268,6 @@ def main() -> int:
args = parser.parse_args()

command = list(chain.from_iterable(args.command))
docker_client = SafeDockerClient()

environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e]))
for e in args.environment])

Expand All @@ -318,29 +292,28 @@ def main() -> int:
ret = 0
if command:
ret = container_run(
docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)
elif args.print_docker_run:
command = []
ret = container_run(
docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, dry_run=True, environment=environment)
else:
# With no commands, execute a build function for the target platform
command = ["/work/mxnet/ci/docker/runtime_functions.sh", "build_{}".format(platform)]
logging.info("No command specified, trying default build: %s", ' '.join(command))
ret = container_run(
docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
platform=platform, nvidia_runtime=args.nvidiadocker,
shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
local_ccache_dir=args.ccache_dir, environment=environment)

if ret != 0:
logging.critical("Execution of %s failed with status: %d", command, ret)
return ret


else:
parser.print_help()
list_platforms()
Expand Down
Loading