Skip to content

Commit

Permalink
Add parameter to configure health endpoint port
Browse files Browse the repository at this point in the history
Signed-off-by: Tobias Giese <tgiese@nvidia.com>
  • Loading branch information
tobiasgiese committed Sep 24, 2024
1 parent d858501 commit ed0ca13
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 22 deletions.
6 changes: 3 additions & 3 deletions cmd/nfd-master/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ import (

const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-master"
GrpcHealthPort = 8082
ProgramName = "nfd-master"
)

func main() {
Expand Down Expand Up @@ -108,7 +107,6 @@ func main() {
utils.ConfigureGrpcKlog()

// Get new NfdMaster instance
args.GrpcHealthPort = GrpcHealthPort
instance, err := master.NewNfdMaster(master.WithArgs(args))
if err != nil {
klog.ErrorS(err, "failed to initialize NfdMaster instance")
Expand Down Expand Up @@ -149,6 +147,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
" DEPRECATED: will be removed in a future release along with the deprecated gRPC API.")
flagset.IntVar(&args.MetricsPort, "metrics", 8081,
"Port on which to expose metrics.")
flagset.IntVar(&args.GrpcHealthPort, "grpc-health", 8082,
"Port on which to expose the grpc health endpoint.")
flagset.BoolVar(&args.Prune, "prune", false,
"Prune all NFD related attributes from all nodes of the cluster and exit.")
flagset.BoolVar(&args.VerifyNodeName, "verify-node-name", false,
Expand Down
4 changes: 2 additions & 2 deletions cmd/nfd-topology-updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-topology-updater"
kubeletSecurePort = 10250
GrpcHealthPort = 8082
)

var DefaultKubeletStateDir = path.Join(string(hostpath.VarDir), "lib", "kubelet")
Expand All @@ -57,7 +56,6 @@ func main() {
utils.ConfigureGrpcKlog()

// Get new TopologyUpdater instance
args.GrpcHealthPort = GrpcHealthPort
instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs)
if err != nil {
klog.ErrorS(err, "failed to initialize topology updater instance")
Expand Down Expand Up @@ -115,6 +113,8 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
"Kube config file.")
flagset.IntVar(&args.MetricsPort, "metrics", 8081,
"Port on which to expose metrics.")
flagset.IntVar(&args.GrpcHealthPort, "grpc-health", 8082,
"Port on which to expose the grpc health endpoint.")
flagset.DurationVar(&resourcemonitorArgs.SleepInterval, "sleep-interval", time.Duration(60)*time.Second,
"Time to sleep between CR updates. zero means no CR updates on interval basis. [Default: 60s]")
flagset.StringVar(&resourcemonitorArgs.Namespace, "watch-namespace", "*",
Expand Down
6 changes: 3 additions & 3 deletions cmd/nfd-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ import (

const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-worker"
GrpcHealthPort = 8082
ProgramName = "nfd-worker"
)

func main() {
Expand Down Expand Up @@ -80,7 +79,6 @@ func main() {
utils.ConfigureGrpcKlog()

// Get new NfdWorker instance
args.GrpcHealthPort = GrpcHealthPort
instance, err := worker.NewNfdWorker(worker.WithArgs(args))
if err != nil {
klog.ErrorS(err, "failed to initialize NfdWorker instance")
Expand Down Expand Up @@ -138,6 +136,8 @@ func initFlags(flagset *flag.FlagSet) (*worker.Args, *worker.ConfigOverrideArgs)
"Do not publish feature labels")
flagset.IntVar(&args.MetricsPort, "metrics", 8081,
"Port on which to expose metrics.")
flagset.IntVar(&args.GrpcHealthPort, "grpc-health", 8082,
"Port on which to expose the grpc health endpoint.")
flagset.StringVar(&args.Options, "options", "",
"Specify config options from command line. Config options are specified "+
"in the same format as in the config file (i.e. json or yaml). These options")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ spec:
- "-feature-gates={{ $key }}={{ $value }}"
{{- end }}
- "-metrics={{ .Values.master.metricsPort | default "8081" }}"
- "-grpc-health={{ .Values.master.healthPort | default "8082" }}"
{{- with .Values.master.extraArgs }}
{{- toYaml . | nindent 12 }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ spec:
# Disable kubelet state tracking by giving an empty path
- "-kubelet-state-dir="
{{- end }}
- -metrics={{ .Values.topologyUpdater.metricsPort | default "8081"}}
- "-metrics={{ .Values.topologyUpdater.metricsPort | default "8081"}}"
- "-grpc-health={{ .Values.topologyUpdater.healthPort | default "8082" }}"
{{- with .Values.topologyUpdater.extraArgs }}
{{- toYaml . | nindent 10 }}
{{- end }}
Expand Down
25 changes: 13 additions & 12 deletions deployment/helm/node-feature-discovery/templates/worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,20 @@ spec:
command:
- "nfd-worker"
args:
{{- if not .Values.featureGates.NodeFeatureAPI }}
{{- if not .Values.featureGates.NodeFeatureAPI }}
- "-server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}"
{{- end }}
{{- if .Values.tls.enable }}
{{- end }}
{{- if .Values.tls.enable }}
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
{{- end }}
# Go over featureGate and add the feature-gate flag
{{- range $key, $value := .Values.featureGates }}
{{- end }}
# Go over featureGate and add the feature-gate flag
{{- range $key, $value := .Values.featureGates }}
- "-feature-gates={{ $key }}={{ $value }}"
{{- end }}
{{- end }}
- "-metrics={{ .Values.worker.metricsPort | default "8081"}}"
- "-grpc-health={{ .Values.worker.healthPort | default "8082" }}"
{{- with .Values.gc.extraArgs }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down Expand Up @@ -125,11 +126,11 @@ spec:
- name: nfd-worker-conf
mountPath: "/etc/kubernetes/node-feature-discovery"
readOnly: true
{{- if .Values.tls.enable }}
{{- if .Values.tls.enable }}
- name: nfd-worker-cert
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
readOnly: true
{{- end }}
{{- end }}
volumes:
- name: host-boot
hostPath:
Expand Down Expand Up @@ -166,12 +167,12 @@ spec:
items:
- key: nfd-worker.conf
path: nfd-worker.conf
{{- if .Values.tls.enable }}
{{- if .Values.tls.enable }}
- name: nfd-worker-cert
secret:
secretName: nfd-worker-cert
{{- end }}
{{- with .Values.worker.nodeSelector }}
{{- end }}
{{- with .Values.worker.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
3 changes: 3 additions & 0 deletions deployment/helm/node-feature-discovery/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ master:
# be removed with it in a future release
port: 8080
metricsPort: 8081
healthPort: 8082
instance:
featureApi:
resyncPeriod:
Expand Down Expand Up @@ -406,6 +407,7 @@ worker:
### <NFD-WORKER-CONF-END-DO-NOT-REMOVE>

metricsPort: 8081
healthPort: 8082
daemonsetAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
Expand Down Expand Up @@ -497,6 +499,7 @@ topologyUpdater:
create: true

metricsPort: 8081
healthPort: 8082
kubeletConfigPath:
kubeletPodResourcesSockPath:
updateInterval: 60s
Expand Down
5 changes: 4 additions & 1 deletion docs/deployment/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ API's you need to install the prometheus operator in your cluster.
| `master.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. **NOTE**: this parameter is related to the deprecated gRPC API and will be removed with it in a future release |
| `master.metricsPort` | integer | 8081 | Port on which to expose metrics from components to prometheus operator |
| `master.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
Expand Down Expand Up @@ -217,7 +218,8 @@ API's you need to install the prometheus operator in your cluster.
| `worker.*` | dict | | NFD worker daemonset configuration |
| `worker.enable` | bool | true | Specifies whether nfd-worker should be deployed |
| `worker.hostNetwork` | bool | false | Specifies whether to enable or disable running the container in the host's network namespace |
| `worker.metricsPort*` | int | 8081 | Port on which to expose metrics from components to prometheus operator |
| `worker.metricsPort` | int | 8081 | Port on which to expose metrics from components to prometheus operator |
| `worker.healthPort` | int | 8082 | Port on which to expose the grpc health endpoint |
| `worker.config` | dict | | NFD worker [configuration](../reference/worker-configuration-reference) |
| `worker.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settins |
| `worker.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
Expand Down Expand Up @@ -252,6 +254,7 @@ API's you need to install the prometheus operator in your cluster.
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
| `topologyUpdater.rbac.create` | bool | true | Specifies whether to create [RBAC][rbac] configuration for topology updater |
| `topologyUpdater.metricsPort` | integer | 8081 | Port on which to expose prometheus metrics |
| `topologyUpdater.healthPort` | integer | 8082 | Port on which to expose the grpc health endpoint |
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
Expand Down
9 changes: 9 additions & 0 deletions node-feature-discovery.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

0 comments on commit ed0ca13

Please sign in to comment.