This repository has been archived by the owner on Jan 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First draft of the NVIDIA Docker plugin
Leverage the Docker volume plugin mechanism introduced with Docker 1.9 This plugin also exports few REST endpoints to ease remote NVIDIA Docker management This should address issue #8
- Loading branch information
Showing
16 changed files
with
1,681 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
|
||
FROM golang | ||
|
||
ENV NVIDIA_GPGKEY_SUM bd841d59a27a406e513db7d405550894188a4c1cd96bf8aa4f82f1b39e0b5c1c | ||
ENV NVIDIA_GPGKEY_FPR 889bee522da690103c4b085ed88c3d385c37d3be | ||
ENV NVIDIA_GDK_SUM 1e32e58f69fe29ee67b845233e7aa9347f37994463252bccbc8bfc8a7104ab5a | ||
|
||
RUN apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/GPGKEY && \ | ||
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +2 > cudasign.pub && \ | ||
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ | ||
echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64 /" > /etc/apt/sources.list.d/cuda.list | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends --force-yes \ | ||
cuda-cudart-dev-6-5=6.5-19 cuda-misc-headers-6-5=6.5-19 \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
RUN objcopy --redefine-sym memcpy=memcpy@GLIBC_2.2.5 /usr/local/cuda-6.5/lib64/libcudart_static.a | ||
|
||
RUN wget -O gdk.run -q http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_352_39_gdk_linux.run && \ | ||
echo "$NVIDIA_GDK_SUM gdk.run" | sha256sum -c --strict - && \ | ||
chmod +x gdk.run && ./gdk.run --silent && rm gdk.run | ||
|
||
COPY src /go/src | ||
VOLUME /go/bin | ||
|
||
ENV CGO_CFLAGS "-I /usr/local/cuda-6.5/include -I /usr/include/nvidia/gdk" | ||
ENV CGO_LDFLAGS "-L /usr/local/cuda-6.5/lib64 -L /usr/src/gdk/nvml/lib -ldl -lrt" | ||
|
||
ARG UID | ||
RUN useradd --uid $UID build | ||
USER build | ||
|
||
CMD go get -v -ldflags="-s" plugin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
|
||
MAKE_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) | ||
BIN_DIR := $(MAKE_DIR)/bin | ||
USER_ID := $(shell id -u) | ||
|
||
IMAGE := nvdocker-build | ||
PREFIX := /usr/local/nvidia | ||
TARGET := nvidia-docker-plugin | ||
PLUGIN := $(BIN_DIR)/plugin | ||
|
||
.PHONY: all install clean | ||
|
||
all : $(PLUGIN) | ||
|
||
$(PLUGIN) : | ||
@docker build --build-arg UID=$(USER_ID) -t $(IMAGE) -f Dockerfile.build $(MAKE_DIR) | ||
@mkdir -p $(BIN_DIR) | ||
@docker run --rm -v $(BIN_DIR):/go/bin $(IMAGE) | ||
|
||
install: all | ||
install -D -T -m 755 $(PLUGIN) $(PREFIX)/$(TARGET) | ||
|
||
clean : | ||
-@docker rmi -f $(IMAGE) golang 2> /dev/null | ||
@rm -rf $(BIN_DIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
|
||
package cuda | ||
|
||
// #cgo LDFLAGS: -lcudart_static | ||
// #include <stdlib.h> | ||
// #include <cuda_runtime_api.h> | ||
import "C" | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"unsafe" | ||
) | ||
|
||
type MemoryInfo struct { | ||
ECC bool | ||
Global uint | ||
Shared uint // includes L1 cache | ||
Constant uint | ||
L2Cache uint | ||
Bandwidth uint | ||
} | ||
|
||
type Device struct { | ||
handle C.int | ||
|
||
Gen string | ||
Arch string | ||
Cores uint | ||
Memory MemoryInfo | ||
} | ||
|
||
func cudaErr(ret C.cudaError_t) error { | ||
if ret == C.cudaSuccess { | ||
return nil | ||
} | ||
err := C.GoString(C.cudaGetErrorString(ret)) | ||
return errors.New(err) | ||
} | ||
|
||
var archToGen = map[string]string{ | ||
"1": "Tesla", | ||
"2": "Fermi", | ||
"3": "Kepler", | ||
"5": "Maxwell", | ||
} | ||
|
||
var archToCoresPerSM = map[string]uint{ | ||
"1.0": 8, // Tesla Generation (SM 1.0) G80 class | ||
"1.1": 8, // Tesla Generation (SM 1.1) G8x G9x class | ||
"1.2": 8, // Tesla Generation (SM 1.2) GT21x class | ||
"1.3": 8, // Tesla Generation (SM 1.3) GT20x class | ||
"2.0": 32, // Fermi Generation (SM 2.0) GF100 GF110 class | ||
"2.1": 48, // Fermi Generation (SM 2.1) GF10x GF11x class | ||
"3.0": 192, // Kepler Generation (SM 3.0) GK10x class | ||
"3.2": 192, // Kepler Generation (SM 3.2) TK1 class | ||
"3.5": 192, // Kepler Generation (SM 3.5) GK11x GK20x class | ||
"3.7": 192, // Kepler Generation (SM 3.7) GK21x class | ||
"5.0": 128, // Maxwell Generation (SM 5.0) GM10x class | ||
"5.2": 128, // Maxwell Generation (SM 5.2) GM20x class | ||
"5.3": 128, // Maxwell Generation (SM 5.3) TX1 class | ||
} | ||
|
||
func GetDriverVersion() (string, error) { | ||
var driver C.int | ||
|
||
err := cudaErr(C.cudaDriverGetVersion(&driver)) | ||
d := fmt.Sprintf("%d.%d", int(driver)/1000, int(driver)%100/10) | ||
return d, err | ||
} | ||
|
||
func NewDevice(busID string) (*Device, error) { | ||
var ( | ||
dev C.int | ||
prop C.struct_cudaDeviceProp | ||
) | ||
|
||
id := C.CString(busID) | ||
if err := cudaErr(C.cudaDeviceGetByPCIBusId(&dev, id)); err != nil { | ||
return nil, err | ||
} | ||
C.free(unsafe.Pointer(id)) | ||
|
||
if err := cudaErr(C.cudaGetDeviceProperties(&prop, dev)); err != nil { | ||
return nil, err | ||
} | ||
arch := fmt.Sprintf("%d.%d", prop.major, prop.minor) | ||
cores, ok := archToCoresPerSM[arch] | ||
if !ok { | ||
return nil, fmt.Errorf("unsupported CUDA arch: %s", arch) | ||
} | ||
|
||
// Destroy the active CUDA context | ||
cudaErr(C.cudaDeviceReset()) | ||
|
||
return &Device{ | ||
handle: dev, | ||
Gen: archToGen[arch[:1]], | ||
Arch: arch, | ||
Cores: cores * uint(prop.multiProcessorCount), | ||
Memory: MemoryInfo{ | ||
ECC: bool(prop.ECCEnabled != 0), | ||
Global: uint(prop.totalGlobalMem / (1024 * 1024)), | ||
Shared: uint(prop.sharedMemPerMultiprocessor / 1024), | ||
Constant: uint(prop.totalConstMem / 1024), | ||
L2Cache: uint(prop.l2CacheSize / 1024), | ||
Bandwidth: 2 * uint((prop.memoryClockRate/1000)*(prop.memoryBusWidth/8)) / 1000, | ||
}, | ||
}, nil | ||
} | ||
|
||
func CanAccessPeer(dev1, dev2 *Device) (bool, error) { | ||
var ok C.int | ||
|
||
err := cudaErr(C.cudaDeviceCanAccessPeer(&ok, dev1.handle, dev2.handle)) | ||
return (ok != 0), err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. | ||
|
||
package graceful | ||
|
||
import ( | ||
"net" | ||
"net/http" | ||
"sync" | ||
"time" | ||
|
||
middleware "github.com/justinas/alice" | ||
"gopkg.in/tylerb/graceful.v1" | ||
) | ||
|
||
const timeout = 5 * time.Second | ||
|
||
type HTTPServer struct { | ||
sync.Mutex | ||
|
||
network string | ||
router *http.ServeMux | ||
server *graceful.Server | ||
err error | ||
} | ||
|
||
func recovery(handler http.Handler) http.Handler { | ||
f := func(w http.ResponseWriter, r *http.Request) { | ||
defer func() { | ||
if recover() != nil { | ||
w.WriteHeader(http.StatusInternalServerError) | ||
} | ||
}() | ||
handler.ServeHTTP(w, r) | ||
} | ||
return http.HandlerFunc(f) | ||
} | ||
|
||
func NewHTTPServer(net, addr string, mw ...middleware.Constructor) *HTTPServer { | ||
r := http.NewServeMux() | ||
|
||
return &HTTPServer{ | ||
network: net, | ||
router: r, | ||
server: &graceful.Server{ | ||
Timeout: timeout, | ||
Server: &http.Server{ | ||
Addr: addr, | ||
Handler: middleware.New(recovery).Append(mw...).Then(r), | ||
ReadTimeout: timeout, | ||
WriteTimeout: timeout, | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
func (s *HTTPServer) Handle(method, route string, handler http.HandlerFunc) { | ||
f := func(w http.ResponseWriter, r *http.Request) { | ||
if r.Method != method { | ||
http.NotFound(w, r) | ||
return | ||
} | ||
handler.ServeHTTP(w, r) | ||
} | ||
s.router.HandleFunc(route, f) | ||
} | ||
|
||
func (s *HTTPServer) Serve() <-chan struct{} { | ||
l, err := net.Listen(s.network, s.server.Addr) | ||
if err != nil { | ||
s.Lock() | ||
s.err = err | ||
s.Unlock() | ||
c := make(chan struct{}) | ||
close(c) | ||
return c | ||
} | ||
|
||
c := s.server.StopChan() | ||
go func() { | ||
s.Lock() | ||
defer s.Unlock() | ||
|
||
err = s.server.Serve(l) | ||
if e, ok := err.(*net.OpError); !ok || (ok && e.Op != "accept") { | ||
s.err = err | ||
} | ||
}() | ||
return c | ||
} | ||
|
||
func (s *HTTPServer) Stop() { | ||
s.server.Stop(timeout) | ||
} | ||
|
||
func (s *HTTPServer) Error() error { | ||
s.Lock() | ||
defer s.Unlock() | ||
|
||
return s.err | ||
} |
Oops, something went wrong.