Skip to content

Commit

Permalink
Merge branch 'apache:main' into 684-gen-pkcs12-keystore-without-cacrt
Browse files Browse the repository at this point in the history
  • Loading branch information
smoldenhauer-ish authored Apr 5, 2024
2 parents 5dcc451 + 1cc55ef commit 953d9aa
Show file tree
Hide file tree
Showing 13 changed files with 363 additions and 50 deletions.
7 changes: 7 additions & 0 deletions api/v1beta1/solrcloud_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,13 @@ func (sc *SolrCloud) GetAllSolrPodNames() []string {
if sc.Spec.Replicas != nil {
replicas = int(*sc.Spec.Replicas)
}
if int(sc.Status.Replicas) > replicas {
replicas = int(sc.Status.Replicas)
}
return sc.GetSolrPodNames(replicas)
}

func (sc *SolrCloud) GetSolrPodNames(replicas int) []string {
podNames := make([]string, replicas)
statefulSetName := sc.StatefulSetName()
for i := range podNames {
Expand Down
22 changes: 19 additions & 3 deletions controllers/solr_cluster_ops_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func retryNextQueuedClusterOpWithQueue(statefulSet *appsv1.StatefulSet, clusterO
return hasOp, err
}

func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownOpIsQueued bool, podList []corev1.Pod, logger logr.Logger) (clusterOp *SolrClusterOp, retryLaterDuration time.Duration, err error) {
func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownOpIsQueued bool, podList []corev1.Pod, blockReconciliationOfStatefulSet bool, logger logr.Logger) (clusterOp *SolrClusterOp, retryLaterDuration time.Duration, err error) {
desiredPods := int(*instance.Spec.Replicas)
configuredPods := int(*statefulSet.Spec.Replicas)
if desiredPods != configuredPods {
Expand All @@ -170,11 +170,26 @@ func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudRec
Metadata: strconv.Itoa(configuredPods - 1),
}
} else if desiredPods > configuredPods && (instance.Spec.Scaling.PopulatePodsOnScaleUp == nil || *instance.Spec.Scaling.PopulatePodsOnScaleUp) {
// We need to wait for all pods to become healthy to scale up in a managed fashion, otherwise
// the balancing will skip some pods
if len(podList) < configuredPods {
// There are not enough pods, the statefulSet controller has yet to create the previously desired pods.
// Do not start the scale up until these missing pods are created.
return nil, time.Second * 5, nil
}
// If Solr nodes are advertised by their individual node services (through an ingress)
// then make sure that the host aliases are set for all desired pods before starting a scale-up.
// If the host aliases do not already include the soon-to-be created pods, then Solr might not be able to balance
// replicas onto the new hosts.
// We need to make sure that the StatefulSet is updated with these new hostAliases before the scale up occurs.
if instance.UsesIndividualNodeServices() && instance.Spec.SolrAddressability.External.UseExternalAddress {
for _, pod := range podList {
if len(pod.Spec.HostAliases) < desiredPods {
return nil, time.Second * 5, nil
}
}
}

clusterOp = &SolrClusterOp{
Operation: ScaleUpLock,
Metadata: strconv.Itoa(desiredPods),
Expand Down Expand Up @@ -349,7 +364,8 @@ func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler
}
operationComplete = true
// Only do a re-balancing for rolling restarts that migrated replicas
if updateMetadata.RequiresReplicaMigration {
// If a scale-up will occur afterwards, skip the re-balancing, because it will occur after the scale-up anyway
if updateMetadata.RequiresReplicaMigration && *instance.Spec.Replicas <= *statefulSet.Spec.Replicas {
nextClusterOp = &SolrClusterOp{
Operation: BalanceReplicasLock,
Metadata: "RollingUpdateComplete",
Expand All @@ -371,7 +387,7 @@ func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler
// We won't kill pods that we need the cluster state for, but we can kill the pods that are already not running.
// This is important for scenarios where there is a bad pod config and nothing is running, but we need to do
// a restart to get a working pod config.
state, retryLater, apiError := util.GetNodeReplicaState(ctx, instance, hasReadyPod, logger)
state, retryLater, apiError := util.GetNodeReplicaState(ctx, instance, statefulSet, hasReadyPod, logger)
if apiError != nil {
return false, true, 0, nil, apiError
} else if !retryLater {
Expand Down
71 changes: 40 additions & 31 deletions controllers/solrcloud_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
hostNameIpMap := make(map[string]string)
// Generate a service for every Node
if instance.UsesIndividualNodeServices() {
// When updating the statefulSet below, the hostNameIpMap is just used to add new IPs or modify existing ones.
// When scaling down, the hostAliases that are no longer found here will not be removed from the hostAliases in the statefulSet pod spec.
// Therefore, it should be ok that we are not reconciling the node services that will be scaled down in the future.
// This is unfortunately the reality since we don't have the statefulSet yet to determine how many Solr pods are still running,
// we just have Spec.replicas which is the requested pod count.
for _, nodeName := range solrNodeNames {
err, ip := r.reconcileNodeService(ctx, logger, instance, nodeName)
if err != nil {
Expand All @@ -161,6 +166,7 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
if instance.Spec.SolrAddressability.External.UseExternalAddress {
if ip == "" {
// If we are using this IP in the hostAliases of the statefulSet, it needs to be set for every service before trying to update the statefulSet
// TODO: Make an event here
blockReconciliationOfStatefulSet = true
} else {
hostNameIpMap[instance.AdvertisedNodeHost(nodeName)] = ip
Expand Down Expand Up @@ -319,36 +325,6 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}

extAddressabilityOpts := instance.Spec.SolrAddressability.External
if extAddressabilityOpts != nil && extAddressabilityOpts.Method == solrv1beta1.Ingress {
// Generate Ingress
ingress := util.GenerateIngress(instance, solrNodeNames)

// Check if the Ingress already exists
ingressLogger := logger.WithValues("ingress", ingress.Name)
foundIngress := &netv1.Ingress{}
err = r.Get(ctx, types.NamespacedName{Name: ingress.Name, Namespace: ingress.Namespace}, foundIngress)
if err != nil && errors.IsNotFound(err) {
ingressLogger.Info("Creating Ingress")
if err = controllerutil.SetControllerReference(instance, ingress, r.Scheme); err == nil {
err = r.Create(ctx, ingress)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundIngress, r.Scheme)
needsUpdate = util.CopyIngressFields(ingress, foundIngress, ingressLogger) || needsUpdate

// Update the found Ingress and write the result back if there are any changes
if needsUpdate && err == nil {
ingressLogger.Info("Updating Ingress")
err = r.Update(ctx, foundIngress)
}
}
if err != nil {
return requeueOrNot, err
}
}

var statefulSet *appsv1.StatefulSet

if !blockReconciliationOfStatefulSet {
Expand Down Expand Up @@ -416,6 +392,39 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
if err != nil {
return requeueOrNot, err
}
if statefulSet != nil && statefulSet.Spec.Replicas != nil {
solrNodeNames = instance.GetSolrPodNames(int(*statefulSet.Spec.Replicas))
}

extAddressabilityOpts := instance.Spec.SolrAddressability.External
if extAddressabilityOpts != nil && extAddressabilityOpts.Method == solrv1beta1.Ingress {
// Generate Ingress
ingress := util.GenerateIngress(instance, solrNodeNames)

// Check if the Ingress already exists
ingressLogger := logger.WithValues("ingress", ingress.Name)
foundIngress := &netv1.Ingress{}
err = r.Get(ctx, types.NamespacedName{Name: ingress.Name, Namespace: ingress.Namespace}, foundIngress)
if err != nil && errors.IsNotFound(err) {
ingressLogger.Info("Creating Ingress")
if err = controllerutil.SetControllerReference(instance, ingress, r.Scheme); err == nil {
err = r.Create(ctx, ingress)
}
} else if err == nil {
var needsUpdate bool
needsUpdate, err = util.OvertakeControllerRef(instance, foundIngress, r.Scheme)
needsUpdate = util.CopyIngressFields(ingress, foundIngress, ingressLogger) || needsUpdate

// Update the found Ingress and write the result back if there are any changes
if needsUpdate && err == nil {
ingressLogger.Info("Updating Ingress")
err = r.Update(ctx, foundIngress)
}
}
if err != nil {
return requeueOrNot, err
}
}

// *********************************************************
// The operations after this require a statefulSet to exist,
Expand Down Expand Up @@ -546,7 +555,7 @@ func (r *SolrCloudReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// a "locked" cluster operation
if clusterOp == nil {
_, scaleDownOpIsQueued := queuedRetryOps[ScaleDownLock]
clusterOp, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, scaleDownOpIsQueued, podList, logger)
clusterOp, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, scaleDownOpIsQueued, podList, blockReconciliationOfStatefulSet, logger)

// If the new clusterOperation is an update to a queued clusterOp, just change the operation that is already queued
if clusterOp != nil {
Expand Down
7 changes: 4 additions & 3 deletions controllers/solrcloud_controller_basic_auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package controllers
import (
"context"
"fmt"

solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
"github.com/apache/solr-operator/controllers/util"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -289,10 +290,10 @@ func expectBasicAuthConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1bet
g.Expect(basicAuthSecretVolMount.MountPath).To(Equal("/etc/secrets/"+secretName), "Wrong path used to mount Basic Auth volume")

expLivenessProbeCmd := fmt.Sprintf("JAVA_TOOL_OPTIONS=\"-Dbasicauth=$(cat /etc/secrets/%s-solrcloud-basic-auth/username):$(cat /etc/secrets/%s-solrcloud-basic-auth/password) -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory\" "+
"solr api -get \"http://${SOLR_HOST}:8983%s\"",
"solr api -get \"http://${SOLR_HOST}:8983%s\" 2>&1 | grep -v JAVA_TOOL_OPTIONS",
solrCloud.Name, solrCloud.Name, expLivenessProbePath)
expReadinessProbeCmd := fmt.Sprintf("JAVA_TOOL_OPTIONS=\"-Dbasicauth=$(cat /etc/secrets/%s-solrcloud-basic-auth/username):$(cat /etc/secrets/%s-solrcloud-basic-auth/password) -Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory\" "+
"solr api -get \"http://${SOLR_HOST}:8983%s\"",
"solr api -get \"http://${SOLR_HOST}:8983%s\" 2>&1 | grep -v JAVA_TOOL_OPTIONS",
solrCloud.Name, solrCloud.Name, expReadinessProbePath)

g.Expect(mainContainer.LivenessProbe).To(Not(BeNil()), "main container should have a liveness probe defined")
Expand Down Expand Up @@ -350,7 +351,7 @@ func expectBasicAuthConfigOnPodTemplateWithGomega(g Gomega, solrCloud *solrv1bet

func expectPutSecurityJsonInZkCmd(g Gomega, expInitContainer *corev1.Container) {
g.Expect(expInitContainer).To(Not(BeNil()), "Didn't find the setup-zk InitContainer in the sts!")
expCmd := "ZK_SECURITY_JSON=$(/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json); " +
expCmd := "ZK_SECURITY_JSON=$(/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json || echo 'failed-to-get-security.json'); " +
"if [ ${#ZK_SECURITY_JSON} -lt 3 ]; then " +
"echo $SECURITY_JSON > /tmp/security.json; " +
"/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST} -cmd putfile /security.json /tmp/security.json; echo \"put security.json in ZK\"; fi"
Expand Down
24 changes: 23 additions & 1 deletion controllers/util/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,29 @@ func CopyPodTemplates(from, to *corev1.PodTemplateSpec, basePath string, logger

if !DeepEqualWithNils(to.Spec.HostAliases, from.Spec.HostAliases) {
requireUpdate = true
to.Spec.HostAliases = from.Spec.HostAliases
if to.Spec.HostAliases == nil {
to.Spec.HostAliases = from.Spec.HostAliases
} else {
// Do not remove aliases that are no longer used.
// This is in case Solr is scaling down and we want to keep the old addresses for future use.
for _, fromAlias := range from.Spec.HostAliases {
found := false
for i, toAlias := range to.Spec.HostAliases {
if fromAlias.Hostnames[0] == toAlias.Hostnames[0] {
found = true
if !DeepEqualWithNils(toAlias, fromAlias) {
requireUpdate = true
to.Spec.HostAliases[i] = fromAlias
break
}
}
}
if !found {
requireUpdate = true
to.Spec.HostAliases = append(to.Spec.HostAliases, fromAlias)
}
}
}
logger.Info("Update required because field changed", "field", basePath+"Spec.HostAliases", "from", to.Spec.HostAliases, "to", from.Spec.HostAliases)
}

Expand Down
7 changes: 5 additions & 2 deletions controllers/util/solr_security_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func addHostHeaderToProbe(httpGet *corev1.HTTPGetAction, host string) {

func cmdToPutSecurityJsonInZk() string {
scriptsDir := "/opt/solr/server/scripts/cloud-scripts"
cmd := " ZK_SECURITY_JSON=$(%s/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json); "
cmd := " ZK_SECURITY_JSON=$(%s/zkcli.sh -zkhost ${ZK_HOST} -cmd get /security.json || echo 'failed-to-get-security.json'); "
cmd += "if [ ${#ZK_SECURITY_JSON} -lt 3 ]; then echo $SECURITY_JSON > /tmp/security.json; %s/zkcli.sh -zkhost ${ZK_HOST} -cmd putfile /security.json /tmp/security.json; echo \"put security.json in ZK\"; fi"
return fmt.Sprintf(cmd, scriptsDir, scriptsDir)
}
Expand Down Expand Up @@ -496,13 +496,16 @@ func useSecureProbe(solrCloud *solr.SolrCloud, probe *corev1.Probe, mountPath st

// Future work - SOLR_TOOL_OPTIONS is only in 9.4.0, use JAVA_TOOL_OPTIONS until that is the minimum supported version
var javaToolOptionsStr string
var javaToolOptionsOutputFilter string
if len(javaToolOptions) > 0 {
javaToolOptionsStr = fmt.Sprintf("JAVA_TOOL_OPTIONS=%q ", strings.Join(javaToolOptions, " "))
javaToolOptionsOutputFilter = " 2>&1 | grep -v JAVA_TOOL_OPTIONS"
} else {
javaToolOptionsStr = ""
javaToolOptionsOutputFilter = ""
}

probeCommand := fmt.Sprintf("%ssolr api -get \"%s://${SOLR_HOST}:%d%s\"", javaToolOptionsStr, solrCloud.UrlScheme(false), probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path)
probeCommand := fmt.Sprintf("%ssolr api -get \"%s://${SOLR_HOST}:%d%s\"%s", javaToolOptionsStr, solrCloud.UrlScheme(false), probe.HTTPGet.Port.IntVal, probe.HTTPGet.Path, javaToolOptionsOutputFilter)
probeCommand = regexp.MustCompile(`\s+`).ReplaceAllString(strings.TrimSpace(probeCommand), " ")

// use an Exec instead of an HTTP GET
Expand Down
14 changes: 12 additions & 2 deletions controllers/util/solr_update_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/apache/solr-operator/controllers/util/solr_api"
"github.com/go-logr/logr"
"github.com/robfig/cron/v3"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"net/url"
Expand Down Expand Up @@ -119,7 +120,7 @@ func (state NodeReplicaState) PodHasReplicas(cloud *solr.SolrCloud, podName stri
return isInClusterState && contents.replicas > 0
}

func GetNodeReplicaState(ctx context.Context, cloud *solr.SolrCloud, hasReadyPod bool, logger logr.Logger) (state NodeReplicaState, retryLater bool, err error) {
func GetNodeReplicaState(ctx context.Context, cloud *solr.SolrCloud, statefulSet *appsv1.StatefulSet, hasReadyPod bool, logger logr.Logger) (state NodeReplicaState, retryLater bool, err error) {
clusterResp := &solr_api.SolrClusterStatusResponse{}
overseerResp := &solr_api.SolrOverseerStatusResponse{}

Expand All @@ -138,7 +139,7 @@ func GetNodeReplicaState(ctx context.Context, cloud *solr.SolrCloud, hasReadyPod
}
}
if err == nil {
state = findSolrNodeContents(clusterResp.ClusterStatus, overseerResp.Leader, GetAllManagedSolrNodeNames(cloud))
state = findSolrNodeContents(clusterResp.ClusterStatus, overseerResp.Leader, GetManagedSolrNodeNames(cloud, int(*statefulSet.Spec.Replicas)))
} else {
logger.Error(err, "Could not fetch cluster state information for cloud")
}
Expand Down Expand Up @@ -535,6 +536,15 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool {
return allNodeNames
}

func GetManagedSolrNodeNames(solrCloud *solr.SolrCloud, currentlyConfiguredPodCount int) map[string]bool {
podNames := solrCloud.GetSolrPodNames(currentlyConfiguredPodCount)
allNodeNames := make(map[string]bool, len(podNames))
for _, podName := range podNames {
allNodeNames[SolrNodeName(solrCloud, podName)] = true
}
return allNodeNames
}

// EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod.
// For updates this will only be called for pods using ephemeral data.
// For scale-down operations, this can be called for pods using ephemeral or persistent data.
Expand Down
3 changes: 3 additions & 0 deletions controllers/util/solr_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,9 @@ func GenerateStatefulSet(solrCloud *solr.SolrCloud, solrCloudStatus *solr.SolrCl
VolumeClaimTemplates: pvcs,
},
}
if solrCloud.UsesHeadlessService() {
stateful.Spec.Template.Spec.Subdomain = solrCloud.HeadlessServiceName()
}

var imagePullSecrets []corev1.LocalObjectReference

Expand Down
20 changes: 20 additions & 0 deletions helm/solr-operator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ annotations:
url: https://github.com/apache/solr-operator/issues/624
- name: Github PR
url: https://github.com/apache/solr-operator/pull/648
description: Avoid reset of security.json if get request fails
links:
- name: Github Issue
url: https://github.com/apache/solr-operator/issues/659
- name: Github PR
url: https://github.com/apache/solr-operator/pull/660
- kind: fixed
description: SolrCloud scaling is now safe when using persistent storage with a 'Delete' reclaim policy
links:
Expand All @@ -75,6 +81,20 @@ annotations:
url: https://github.com/apache/solr-operator/issues/693
- name: Github PR
url: https://github.com/apache/solr-operator/pull/694
- kind: security
description: Command-based Solr probes no longer echoes 'JAVA_TOOL_OPTIONS' values in Kubernetes events.
links:
- name: JIRA Issue
url: https://issues.apache.org/jira/browse/SOLR-17216
- name: Github PR
url: https://github.com/apache/solr-operator/pull/698
- kind: fixed
description: SolrClouds addressed via an Ingress now scale up and down safely.
links:
- name: Github Issue
url: https://github.com/apache/solr-operator/issues/682
- name: Github PR
url: https://github.com/apache/solr-operator/pull/692
artifacthub.io/images: |
- name: solr-operator
image: apache/solr-operator:v0.9.0-prerelease
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/solrcloud_rolling_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ var _ = FDescribe("E2E - SolrCloud - Rolling Upgrades", func() {
}

By("waiting for the balanceReplicas to finish")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*30, time.Second, func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a balanceReplicas lock after balancing is complete.")
Expand Down
Loading

0 comments on commit 953d9aa

Please sign in to comment.