Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/controller/cluster/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func newVirtualClient(ctx context.Context, hostClient ctrlruntimeclient.Client,
}

if err := hostClient.Get(ctx, kubeconfigSecretName, &clusterKubeConfig); err != nil {
return nil, fmt.Errorf("failed to get kubeconfig secret: %w", err)
return nil, err
}

restConfig, err := clientcmd.RESTConfigFromKubeConfig(clusterKubeConfig.Data["kubeconfig.yaml"])
Expand Down
8 changes: 6 additions & 2 deletions pkg/controller/cluster/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,9 +416,11 @@ func (s *Server) setupDynamicPersistence() v1.PersistentVolumeClaim {
func (s *Server) setupStartCommand() (string, error) {
var output bytes.Buffer

tmpl := singleServerTemplate
tmpl := StartupCommand

mode := "single"
if *s.cluster.Spec.Servers > 1 {
tmpl = HAServerTemplate
mode = "ha"
}

tmplCmd, err := template.New("").Parse(tmpl)
Expand All @@ -430,6 +432,8 @@ func (s *Server) setupStartCommand() (string, error) {
"ETCD_DIR": "/var/lib/rancher/k3s/server/db/etcd",
"INIT_CONFIG": "/opt/rancher/k3s/init/config.yaml",
"SERVER_CONFIG": "/opt/rancher/k3s/server/config.yaml",
"CLUSTER_MODE": mode,
"K3K_MODE": string(s.cluster.Spec.Mode),
"EXTRA_ARGS": strings.Join(s.cluster.Spec.ServerArgs, " "),
}); err != nil {
return "", err
Expand Down
112 changes: 99 additions & 13 deletions pkg/controller/cluster/server/template.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,102 @@
package server

var singleServerTemplate string = `
if [ -d "{{.ETCD_DIR}}" ]; then
# if directory exists then it means its not an initial run
/bin/k3s server --cluster-reset --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
fi
rm -f /var/lib/rancher/k3s/server/db/reset-flag
/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log`

var HAServerTemplate string = `
if [ ${POD_NAME: -1} == 0 ] && [ ! -d "{{.ETCD_DIR}}" ]; then
var StartupCommand string = `
info()
{
echo "[INFO] [$(date +"%c")]" "$@"
}

fatal()
{
echo "[FATAL] [$(date +"%c")] " "$@" >&2
exit 1
}

# safe mode function to reset node IP after pod restarts
safe_mode() {
CURRENT_IP=""
if [ -f /var/lib/rancher/k3s/k3k-node-ip ]; then
CURRENT_IP=$(cat /var/lib/rancher/k3s/k3k-node-ip)
fi

if [ -z "$CURRENT_IP" ] || [ "$CURRENT_IP" = "$POD_IP" ] || [ {{.K3K_MODE}} != "virtual" ]; then
return
fi

# skipping if the node is starting for the first time
if [ -d "{{.ETCD_DIR}}" ]; then

info "Starting K3s in Safe Mode (Network Policy Disabled) to patch Node IP from ${CURRENT_IP} to ${POD_IP}"
/bin/k3s server --disable-network-policy --config $1 {{.EXTRA_ARGS}} > /dev/null 2>&1 &
PID=$!

# Start the loop to wait for the nodeIP to change
info "Waiting for Node IP to update to ${POD_IP}."
count=0
until kubectl get nodes -o wide 2>/dev/null | grep -q "${POD_IP}"; do
if ! kill -0 $PID 2>/dev/null; then
fatal "safe Mode K3s process died unexpectedly!"
fi
sleep 2
count=$((count+1))

if [ $count -gt 60 ]; then
fatal "timed out waiting for node to change IP from $CURRENT_IP to $POD_IP"
fi
done

info "Node IP is set to ${POD_IP} successfully. Stopping Safe Mode process..."
kill $PID
wait $PID 2>/dev/null || true
fi
}

start_single_node() {
info "Starting single node setup..."

# checking for existing data in single server if found we must perform reset
if [ -d "{{.ETCD_DIR}}" ]; then
info "Existing data found in single node setup. Performing cluster-reset to ensure quorum..."

if ! /bin/k3s server --cluster-reset --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} > /dev/null 2>&1; then
fatal "cluster reset failed!"
fi
info "Cluster reset complete. Removing Reset flag file."
rm -f /var/lib/rancher/k3s/server/db/reset-flag
fi

# entering safe mode to ensure correct NodeIP
safe_mode {{.INIT_CONFIG}}

info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip

/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
else
/bin/k3s server --config {{.SERVER_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
fi`
}

start_ha_node() {
info "Starting pod $POD_NAME in HA node setup"

if [ ${POD_NAME: -1} == 0 ] && [ ! -d "{{.ETCD_DIR}}" ]; then
info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip

/bin/k3s server --config {{.INIT_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.log
else
safe_mode {{.SERVER_CONFIG}}

info "Adding pod IP file."
echo $POD_IP > /var/lib/rancher/k3s/k3k-node-ip

/bin/k3s server --config {{.SERVER_CONFIG}} {{.EXTRA_ARGS}} 2>&1 | tee /var/log/k3s.info
fi
}

case "{{.CLUSTER_MODE}}" in
"ha")
start_ha_node
;;
"single"|*)
start_single_node
;;
esac`
2 changes: 1 addition & 1 deletion pkg/controller/cluster/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func (p *StatefulSetReconciler) handleServerPod(ctx context.Context, cluster v1b

if pod.DeletionTimestamp.IsZero() {
if controllerutil.AddFinalizer(pod, etcdPodFinalizerName) {
log.V(1).Info("Server Pod is being deleted. Removing finalizer", "pod", pod.Name, "namespace", pod.Namespace)
log.V(1).Info("Server Pod is being created. Adding finalizer", "pod", pod.Name, "namespace", pod.Namespace)

return p.Client.Update(ctx, pod)
}
Expand Down
30 changes: 30 additions & 0 deletions tests/cluster_update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ var _ = When("a shared mode cluster update its envs", Label(e2eTestLabel), Label
serverPods := listServerPods(ctx, virtualCluster)
g.Expect(len(serverPods)).To(Equal(1))

serverPod := serverPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

serverEnv1, ok := getEnv(&serverPods[0], "TEST_SERVER_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(serverEnv1).To(Equal("upgraded"))
Expand All @@ -145,6 +150,11 @@ var _ = When("a shared mode cluster update its envs", Label(e2eTestLabel), Label
aPods := listAgentPods(ctx, virtualCluster)
g.Expect(aPods).To(HaveLen(len(nodes.Items)))

agentPod := aPods[0]
_, cond = pod.GetPodCondition(&agentPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

agentEnv1, ok := getEnv(&aPods[0], "TEST_AGENT_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(agentEnv1).To(Equal("upgraded"))
Expand Down Expand Up @@ -213,6 +223,11 @@ var _ = When("a shared mode cluster update its server args", Label(e2eTestLabel)
sPods := listServerPods(ctx, virtualCluster)
g.Expect(len(sPods)).To(Equal(1))

serverPod := sPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

g.Expect(isArgFound(&sPods[0], "--node-label=test_server=upgraded")).To(BeTrue())
}).
WithPolling(time.Second * 2).
Expand Down Expand Up @@ -330,6 +345,11 @@ var _ = When("a virtual mode cluster update its envs", Label(e2eTestLabel), Labe
serverPods := listServerPods(ctx, virtualCluster)
g.Expect(len(serverPods)).To(Equal(1))

serverPod := serverPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

serverEnv1, ok := getEnv(&serverPods[0], "TEST_SERVER_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(serverEnv1).To(Equal("upgraded"))
Expand All @@ -345,6 +365,11 @@ var _ = When("a virtual mode cluster update its envs", Label(e2eTestLabel), Labe
aPods := listAgentPods(ctx, virtualCluster)
g.Expect(len(aPods)).To(Equal(1))

agentPod := aPods[0]
_, cond = pod.GetPodCondition(&agentPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

agentEnv1, ok := getEnv(&aPods[0], "TEST_AGENT_ENV_1")
g.Expect(ok).To(BeTrue())
g.Expect(agentEnv1).To(Equal("upgraded"))
Expand Down Expand Up @@ -416,6 +441,11 @@ var _ = When("a virtual mode cluster update its server args", Label(e2eTestLabel
sPods := listServerPods(ctx, virtualCluster)
g.Expect(len(sPods)).To(Equal(1))

serverPod := sPods[0]
_, cond := pod.GetPodCondition(&serverPod.Status, v1.PodReady)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Status).To(BeEquivalentTo(metav1.ConditionTrue))

g.Expect(isArgFound(&sPods[0], "--node-label=test_server=upgraded")).To(BeTrue())
}).
WithPolling(time.Second * 2).
Expand Down
3 changes: 0 additions & 3 deletions tests/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ func NewCluster(namespace string) *v1beta1.Cluster {
Persistence: v1beta1.PersistenceConfig{
Type: v1beta1.EphemeralPersistenceMode,
},
ServerArgs: []string{
"--disable-network-policy",
},
},
}
}
Expand Down
Loading