Skip to content

Commit

Permalink
FIX: #412 set Restart flag to true before creation (#413, thanks @fab…
Browse files Browse the repository at this point in the history
  • Loading branch information
fabricev authored Nov 26, 2020
1 parent 3b52e04 commit 2d8d60b
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 3 deletions.
1 change: 1 addition & 0 deletions cmd/node/nodeCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
Labels: map[string]string{
k3d.LabelRole: roleStr,
},
Restart: true,
}
nodes = append(nodes, node)
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func ClusterCreate(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clus

node.Name = generateNodeName(cluster.Name, node.Role, suffix)
node.Network = cluster.Network.Name

node.Restart = true
node.GPURequest = cluster.CreateClusterOpts.GPURequest

// create node
Expand Down Expand Up @@ -343,6 +343,7 @@ func ClusterCreate(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clus
Role: k3d.LoadBalancerRole,
Labels: k3d.DefaultObjectLabels, // TODO: createLoadBalancer: add more expressive labels
Network: cluster.Network.Name,
Restart: true,
}
cluster.Nodes = append(cluster.Nodes, lbNode) // append lbNode to list of cluster nodes, so it will be considered during rollback
log.Infof("Creating LoadBalancer '%s'", lbNode.Name)
Expand Down
11 changes: 9 additions & 2 deletions pkg/cluster/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func NodeGet(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node) (*k3
return node, nil
}

// NodeWaitForLogMessage follows the logs of a node container and returns if it finds a specific line in there (or timeout is reached)
//NodeWaitForLogMessage follows the logs of a node container and returns if it finds a specific line in there (or timeout is reached)
func NodeWaitForLogMessage(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, message string, since time.Time) error {
for {
select {
Expand Down Expand Up @@ -353,8 +353,15 @@ func NodeWaitForLogMessage(ctx context.Context, runtime runtimes.Runtime, node *
if nRead > 0 && strings.Contains(output, message) {
break
}

// check if the container is restarting
running, status, _ := runtime.GetNodeStatus(ctx, node)
if running && status == k3d.NodeStatusRestarting {
return fmt.Errorf("Node %s is restarting, early exit to avoid crash loop", node.Name)
}

time.Sleep(500 * time.Millisecond) // wait for half a second to avoid overloading docker (error `socket: too many open files`)
}
time.Sleep(500 * time.Millisecond) // wait for half a second to avoid overloading docker (error `socket: too many open files`)
log.Debugf("Finished waiting for log message '%s' from node '%s'", message, node.Name)
return nil
}
5 changes: 5 additions & 0 deletions pkg/runtimes/containerd/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ func (d Containerd) GetNode(ctx context.Context, node *k3d.Node) (*k3d.Node, err
return nil, nil
}

// GetNodeStatus returns the status of a node (Running, Started, etc.)
func (d Containerd) GetNodeStatus(ctx context.Context, node *k3d.Node) (bool, string, error) {
return true, "", nil
}

// GetNodeLogs returns the logs from a given node
func (d Containerd) GetNodeLogs(ctx context.Context, node *k3d.Node, since time.Time) (io.ReadCloser, error) {
return nil, nil
Expand Down
1 change: 1 addition & 0 deletions pkg/runtimes/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type Runtime interface {
DeleteNode(context.Context, *k3d.Node) error
GetNodesByLabel(context.Context, map[string]string) ([]*k3d.Node, error)
GetNode(context.Context, *k3d.Node) (*k3d.Node, error)
GetNodeStatus(context.Context, *k3d.Node) (bool, string, error)
CreateNetworkIfNotPresent(context.Context, string) (string, bool, error) // @return NETWORK_NAME, EXISTS, ERROR
GetKubeconfig(context.Context, *k3d.Node) (io.ReadCloser, error)
DeleteNetwork(context.Context, string) error
Expand Down
3 changes: 3 additions & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ var ReadyLogMessageByRole = map[Role]string{
LoadBalancerRole: "start worker processes",
}

// NodeStatusRestarting defines the status string that signals the node container is restarting
const NodeStatusRestarting = "restarting"

// Role defines a k3d node role
type Role string

Expand Down

0 comments on commit 2d8d60b

Please sign in to comment.