Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/controller/cluster/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func newVirtualClient(ctx context.Context, hostClient ctrlruntimeclient.Client,
}

if err := hostClient.Get(ctx, kubeconfigSecretName, &clusterKubeConfig); err != nil {
return nil, fmt.Errorf("failed to get kubeconfig secret: %w", err)
return nil, err
}

restConfig, err := clientcmd.RESTConfigFromKubeConfig(clusterKubeConfig.Data["kubeconfig.yaml"])
Expand Down
16 changes: 10 additions & 6 deletions pkg/controller/cluster/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,9 +416,11 @@ func (s *Server) setupDynamicPersistence() v1.PersistentVolumeClaim {
func (s *Server) setupStartCommand() (string, error) {
var output bytes.Buffer

tmpl := singleServerTemplate
tmpl := StartupCommand

mode := "single"
if *s.cluster.Spec.Servers > 1 {
tmpl = HAServerTemplate
mode = "ha"
}

tmplCmd, err := template.New("").Parse(tmpl)
Expand All @@ -427,10 +429,12 @@ func (s *Server) setupStartCommand() (string, error) {
}

if err := tmplCmd.Execute(&output, map[string]string{
"ETCD_DIR": "/var/lib/rancher/k3s/server/db/etcd",
"INIT_CONFIG": "/opt/rancher/k3s/init/config.yaml",
"SERVER_CONFIG": "/opt/rancher/k3s/server/config.yaml",
"EXTRA_ARGS": strings.Join(s.cluster.Spec.ServerArgs, " "),
"ETCD_DIR": "/var/lib/rancher/k3s/server/db/etcd",
"INIT_CONFIG": "/opt/rancher/k3s/init/config.yaml",
"SERVER_CONFIG": "/opt/rancher/k3s/server/config.yaml",
"ORCHESTRATION_MODE": mode,
"K3K_MODE": string(s.cluster.Spec.Mode),
"EXTRA_ARGS": strings.Join(s.cluster.Spec.ServerArgs, " "),
}); err != nil {
return "", err
}
Expand Down
107 changes: 95 additions & 12 deletions pkg/controller/cluster/server/template.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,99 @@
package server

var singleServerTemplate string = `
if [ -d "{{.ETCD_DIR}}" ]; then
# if directory exists then it means its not an initial run
/bin/k3s server --cluster-reset --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
fi
rm -f /var/lib/rancher/k3s/server/db/reset-flag
/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log`

var HAServerTemplate string = `
if [ ${POD_NAME: -1} == 0 ] && [ ! -d "{{.ETCD_DIR}}" ]; then
var StartupCommand string = `
info()
{
echo "[INFO] [$(date +"%c")]" "$@"
}
fatal()
{
echo "[FATAL] [$(date +"%c")] " "$@" >&2
exit 1
}
# safe mode function to reset node IP after pod restarts
safe_mode() {
CURRENT_IP=""
if [ -f /var/lib/rancher/k3s/k3k-node-ip ]; then
CURRENT_IP=$(cat /var/lib/rancher/k3s/k3k-node-ip)
fi
if [ -z "$CURRENT_IP" ] || [ "$CURRENT_IP" = "$POD_IP" ] || [ {{.K3K_MODE}} != "virtual" ]; then
return
fi
# skipping if the node is starting for the first time
if [ -d "{{.ETCD_DIR}}" ]; then
info "Starting K3s in Safe Mode (Network Policy Disabled) to patch Node IP from ${CURRENT_IP} to ${POD_IP}"
/bin/k3s server --disable-network-policy --config $1 {{.EXTRA_ARGS}} > /dev/null 2>&1 &
PID=$!
# Start the loop to wait for the nodeIP to change
info "Waiting for Node IP to update to ${POD_IP}."
count=0
until kubectl get nodes -o wide 2>/dev/null | grep -q "${POD_IP}"; do
if ! kill -0 $PID 2>/dev/null; then
fatal "safe Mode K3s process died unexpectedly!"
fi
sleep 2
count=$((count+1))
if [ $count -gt 60 ]; then
fatal "timed out waiting for node to change IP from $CURRENT_IP to $POD_IP"
fi
done
info "Node IP is set to ${POD_IP} successfully. Stopping Safe Mode process..."
kill $PID
wait $PID 2>/dev/null || true
fi
}
single_server() {
info "Starting single node setup..."
# checking for existing data in single server if found we must perform reset
if [ -d "{{.ETCD_DIR}}" ]; then
info "Existing data found in single node setup. Performing cluster-reset to ensure quorum..."
if ! /bin/k3s server --cluster-reset --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} > /dev/null 2>&1; then
fatal "cluster reset failed!"
fi
info "Cluster reset complete. Removing Reset flag file."
rm -f /var/lib/rancher/k3s/server/db/reset-flag
fi
# entering safe mode to ensure correct NodeIP
safe_mode {{.INIT_CONFIG}}
info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip
/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
else
/bin/k3s server --config {{.SERVER_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
}
ha_server() {
info "Starting pod $POD_NAME in HA node setup"
if [ ${POD_NAME: -1} == 0 ] && [ ! -d "{{.ETCD_DIR}}" ]; then
info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip
/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
else
safe_mode {{.SERVER_CONFIG}}
info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip
/bin/k3s server --config {{.SERVER_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.info
fi
}
if [ {{.ORCHESTRATION_MODE}} == "ha" ]; then
ha_server
else
single_server
fi`
2 changes: 1 addition & 1 deletion pkg/controller/cluster/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (p *StatefulSetReconciler) handleServerPod(ctx context.Context, cluster v1b

if pod.DeletionTimestamp.IsZero() {
if controllerutil.AddFinalizer(pod, etcdPodFinalizerName) {
log.V(1).Info("Server Pod is being deleted. Removing finalizer", "pod", pod.Name, "namespace", pod.Namespace)
log.V(1).Info("Server Pod is being created. Adding finalizer", "pod", pod.Name, "namespace", pod.Namespace)

return p.Client.Update(ctx, pod)
}
Expand Down
30 changes: 30 additions & 0 deletions tests/cluster_update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ var _ = When("a shared mode cluster update its envs", Label(e2eTestLabel), Label
serverPods := listServerPods(ctx, virtualCluster)
g.Expect(len(serverPods)).To(Equal(1))

serverPod := serverPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

serverEnv1, ok := getEnv(&serverPods[0], "TEST_SERVER_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(serverEnv1).To(Equal("upgraded"))
Expand All @@ -145,6 +150,11 @@ var _ = When("a shared mode cluster update its envs", Label(e2eTestLabel), Label
aPods := listAgentPods(ctx, virtualCluster)
g.Expect(aPods).To(HaveLen(len(nodes.Items)))

agentPod := aPods[0]
_, cond = pod.GetPodCondition(&agentPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

agentEnv1, ok := getEnv(&aPods[0], "TEST_AGENT_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(agentEnv1).To(Equal("upgraded"))
Expand Down Expand Up @@ -213,6 +223,11 @@ var _ = When("a shared mode cluster update its server args", Label(e2eTestLabel)
sPods := listServerPods(ctx, virtualCluster)
g.Expect(len(sPods)).To(Equal(1))

serverPod := sPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

g.Expect(isArgFound(&sPods[0], "--node-label=test_server=upgraded")).To(BeTrue())
}).
WithPolling(time.Second * 2).
Expand Down Expand Up @@ -330,6 +345,11 @@ var _ = When("a virtual mode cluster update its envs", Label(e2eTestLabel), Labe
serverPods := listServerPods(ctx, virtualCluster)
g.Expect(len(serverPods)).To(Equal(1))

serverPod := serverPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

serverEnv1, ok := getEnv(&serverPods[0], "TEST_SERVER_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(serverEnv1).To(Equal("upgraded"))
Expand All @@ -345,6 +365,11 @@ var _ = When("a virtual mode cluster update its envs", Label(e2eTestLabel), Labe
aPods := listAgentPods(ctx, virtualCluster)
g.Expect(len(aPods)).To(Equal(1))

agentPod := aPods[0]
_, cond = pod.GetPodCondition(&agentPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

agentEnv1, ok := getEnv(&aPods[0], "TEST_AGENT_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(agentEnv1).To(Equal("upgraded"))
Expand Down Expand Up @@ -416,6 +441,11 @@ var _ = When("a virtual mode cluster update its server args", Label(e2eTestLabel
sPods := listServerPods(ctx, virtualCluster)
g.Expect(len(sPods)).To(Equal(1))

serverPod := sPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

g.Expect(isArgFound(&sPods[0], "--node-label=test_server=upgraded")).To(BeTrue())
}).
WithPolling(time.Second * 2).
Expand Down
3 changes: 0 additions & 3 deletions tests/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ func NewCluster(namespace string) *v1beta1.Cluster {
Persistence: v1beta1.PersistenceConfig{
Type: v1beta1.EphemeralPersistenceMode,
},
ServerArgs: []string{
"--disable-network-policy",
},
},
}
}
Expand Down
Loading