bindy/reconcilers/
bind9cluster.rs

1// Copyright (c) 2025 Erick Bourgeois, firestoned
2// SPDX-License-Identifier: MIT
3
4//! BIND9 cluster reconciliation logic.
5//!
6//! This module handles the lifecycle of BIND9 cluster resources in Kubernetes.
7//! It manages the `Bind9Instance` resources that belong to a cluster and updates
8//! the cluster status to reflect the overall health.
9
10use crate::constants::{API_GROUP_VERSION, KIND_BIND9_CLUSTER, KIND_BIND9_INSTANCE};
11use crate::crd::{
12    Bind9Cluster, Bind9ClusterStatus, Bind9Instance, Bind9InstanceSpec, Condition, ServerRole,
13};
14use crate::labels::{
15    BINDY_CLUSTER_LABEL, BINDY_INSTANCE_INDEX_ANNOTATION, BINDY_MANAGED_BY_LABEL,
16    BINDY_RECONCILE_TRIGGER_ANNOTATION, BINDY_ROLE_LABEL, FINALIZER_BIND9_CLUSTER, K8S_PART_OF,
17    MANAGED_BY_BIND9_CLUSTER, PART_OF_BINDY, ROLE_PRIMARY, ROLE_SECONDARY,
18};
19use crate::reconcilers::finalizers::{ensure_finalizer, handle_deletion, FinalizerCleanup};
20use crate::status_reasons::{
21    bind9_instance_condition_type, CONDITION_TYPE_READY, REASON_ALL_READY, REASON_NOT_READY,
22    REASON_NO_CHILDREN, REASON_PARTIALLY_READY, REASON_READY,
23};
24use anyhow::Result;
25use chrono::Utc;
26use k8s_openapi::{
27    api::{
28        apps::v1::Deployment,
29        core::v1::{ConfigMap, Secret, Service},
30    },
31    apimachinery::pkg::apis::meta::v1::ObjectMeta,
32};
33use kube::{
34    api::{DeleteParams, ListParams, Patch, PatchParams, PostParams},
35    client::Client,
36    Api, ResourceExt,
37};
38use serde_json::json;
39use std::collections::BTreeMap;
40use tracing::{debug, error, info, warn};
41
42/// Implement cleanup trait for `Bind9Cluster` finalizer management
43#[async_trait::async_trait]
44impl FinalizerCleanup for Bind9Cluster {
45    async fn cleanup(&self, client: &Client) -> Result<()> {
46        let namespace = self.namespace().unwrap_or_default();
47        let name = self.name_any();
48        delete_cluster_instances(client, &namespace, &name).await
49    }
50}
51
52/// Reconciles a `Bind9Cluster` resource.
53///
54/// This function:
55/// 1. Checks if the cluster is being deleted and handles cleanup
56/// 2. Adds finalizer if not present
57/// 3. Creates/updates cluster `ConfigMap`
58/// 4. Reconciles managed instances
59/// 5. Updates cluster status based on instance health
60///
61/// # Arguments
62///
63/// * `client` - Kubernetes API client
64/// * `cluster` - The `Bind9Cluster` resource to reconcile
65///
66/// # Returns
67///
68/// * `Ok(())` - If reconciliation succeeded
69/// * `Err(_)` - If status update failed
70///
71/// # Errors
72///
73/// Returns an error if Kubernetes API operations fail or status update fails.
74pub async fn reconcile_bind9cluster(client: Client, cluster: Bind9Cluster) -> Result<()> {
75    let namespace = cluster.namespace().unwrap_or_default();
76    let name = cluster.name_any();
77
78    info!("Reconciling Bind9Cluster: {}/{}", namespace, name);
79    debug!(
80        namespace = %namespace,
81        name = %name,
82        generation = ?cluster.metadata.generation,
83        "Starting Bind9Cluster reconciliation"
84    );
85
86    // Handle deletion if cluster is being deleted
87    if cluster.metadata.deletion_timestamp.is_some() {
88        return handle_deletion(&client, &cluster, FINALIZER_BIND9_CLUSTER).await;
89    }
90
91    // Ensure finalizer is present
92    ensure_finalizer(&client, &cluster, FINALIZER_BIND9_CLUSTER).await?;
93
94    // Check if spec has changed using the standard generation check
95    let current_generation = cluster.metadata.generation;
96    let observed_generation = cluster.status.as_ref().and_then(|s| s.observed_generation);
97
98    // Only reconcile spec-related resources if spec changed
99    let spec_changed =
100        crate::reconcilers::should_reconcile(current_generation, observed_generation);
101
102    if spec_changed {
103        debug!(
104            "Reconciliation needed: current_generation={:?}, observed_generation={:?}",
105            current_generation, observed_generation
106        );
107
108        // Create or update shared cluster ConfigMap
109        create_or_update_cluster_configmap(&client, &cluster).await?;
110
111        // Reconcile managed instances (create/update as needed)
112        reconcile_managed_instances(&client, &cluster).await?;
113    } else {
114        debug!(
115            "Spec unchanged (generation={:?}), skipping cluster resource updates",
116            current_generation
117        );
118    }
119
120    // ALWAYS list and analyze cluster instances to update status
121    // This ensures status reflects current instance health even when spec hasn't changed
122    let instances: Vec<Bind9Instance> =
123        list_cluster_instances(&client, &cluster, &namespace, &name).await?;
124
125    // Calculate cluster status from instances
126    let (instance_count, ready_instances, instance_names, conditions) =
127        calculate_cluster_status(&instances, &namespace, &name);
128
129    // Update cluster status with all conditions
130    update_status(
131        &client,
132        &cluster,
133        conditions,
134        instance_count,
135        ready_instances,
136        instance_names,
137    )
138    .await?;
139
140    Ok(())
141}
142
143/// List all `Bind9Instance` resources that reference a cluster
144///
145/// Filters instances in the namespace that have `clusterRef` matching the cluster name.
146///
147/// # Arguments
148///
149/// * `client` - Kubernetes API client
150/// * `cluster` - The `Bind9Cluster` to find instances for
151/// * `namespace` - Cluster namespace
152/// * `name` - Cluster name
153///
154/// # Returns
155///
156/// Vector of `Bind9Instance` resources that reference this cluster
157///
158/// # Errors
159///
160/// Returns an error if:
161/// - Failed to list instances
162/// - Failed to update cluster status on error
163async fn list_cluster_instances(
164    client: &Client,
165    cluster: &Bind9Cluster,
166    namespace: &str,
167    name: &str,
168) -> Result<Vec<Bind9Instance>> {
169    // List all Bind9Instance resources in the namespace that reference this cluster
170    let instances_api: Api<Bind9Instance> = Api::namespaced(client.clone(), namespace);
171    let list_params = ListParams::default();
172    debug!(namespace = %namespace, "Listing Bind9Instance resources");
173
174    match instances_api.list(&list_params).await {
175        Ok(list) => {
176            debug!(
177                total_instances_in_ns = list.items.len(),
178                "Listed Bind9Instance resources"
179            );
180            // Filter instances that reference this cluster
181            let filtered: Vec<_> = list
182                .items
183                .into_iter()
184                .filter(|instance| instance.spec.cluster_ref == name)
185                .collect();
186            debug!(
187                filtered_instances = filtered.len(),
188                cluster_ref = %name,
189                "Filtered instances by cluster reference"
190            );
191            Ok(filtered)
192        }
193        Err(e) => {
194            error!(
195                "Failed to list Bind9Instance resources for cluster {}/{}: {}",
196                namespace, name, e
197            );
198
199            // Update status to show error
200            let error_condition = Condition {
201                r#type: CONDITION_TYPE_READY.to_string(),
202                status: "False".to_string(),
203                reason: Some(REASON_NOT_READY.to_string()),
204                message: Some(format!("Failed to list instances: {e}")),
205                last_transition_time: Some(Utc::now().to_rfc3339()),
206            };
207            update_status(client, cluster, vec![error_condition], 0, 0, vec![]).await?;
208
209            Err(e.into())
210        }
211    }
212}
213
214/// Calculate cluster status from instance health
215///
216/// Analyzes instance list to determine cluster readiness.
217///
218/// # Arguments
219///
220/// * `instances` - List of `Bind9Instance` resources for the cluster
221/// * `namespace` - Cluster namespace (for logging)
222/// * `name` - Cluster name (for logging)
223///
224/// # Returns
225///
226/// Tuple of:
227/// - `instance_count` - Total number of instances
228/// - `ready_instances` - Number of ready instances
229/// - `instance_names` - Names of all instances
230/// - `status` - Cluster status ("True" or "False")
231/// - `message` - Status message
232#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
233pub(crate) fn calculate_cluster_status(
234    instances: &[Bind9Instance],
235    namespace: &str,
236    name: &str,
237) -> (i32, i32, Vec<String>, Vec<Condition>) {
238    // Count total instances and ready instances
239    let instance_count = instances.len() as i32;
240    let instance_names: Vec<String> = instances.iter().map(ResourceExt::name_any).collect();
241
242    let ready_instances = instances
243        .iter()
244        .filter(|instance| {
245            instance
246                .status
247                .as_ref()
248                .and_then(|status| status.conditions.first())
249                .is_some_and(|condition| condition.r#type == "Ready" && condition.status == "True")
250        })
251        .count() as i32;
252
253    info!(
254        "Bind9Cluster {}/{} has {} instances, {} ready",
255        namespace, name, instance_count, ready_instances
256    );
257
258    // Create instance-level conditions
259    let mut instance_conditions = Vec::new();
260    for (index, instance) in instances.iter().enumerate() {
261        let instance_name = instance.name_any();
262        let is_instance_ready = instance
263            .status
264            .as_ref()
265            .and_then(|status| status.conditions.first())
266            .is_some_and(|condition| condition.r#type == "Ready" && condition.status == "True");
267
268        let (status, reason, message) = if is_instance_ready {
269            (
270                "True",
271                REASON_READY,
272                format!("Instance {instance_name} is ready"),
273            )
274        } else {
275            (
276                "False",
277                REASON_NOT_READY,
278                format!("Instance {instance_name} is not ready"),
279            )
280        };
281
282        instance_conditions.push(Condition {
283            r#type: bind9_instance_condition_type(index),
284            status: status.to_string(),
285            reason: Some(reason.to_string()),
286            message: Some(message),
287            last_transition_time: Some(Utc::now().to_rfc3339()),
288        });
289    }
290
291    // Create encompassing Ready condition
292    let (encompassing_status, encompassing_reason, encompassing_message) = if instance_count == 0 {
293        debug!("No instances found for cluster");
294        (
295            "False",
296            REASON_NO_CHILDREN,
297            "No instances found for this cluster".to_string(),
298        )
299    } else if ready_instances == instance_count {
300        debug!("All instances ready");
301        (
302            "True",
303            REASON_ALL_READY,
304            format!("All {instance_count} instances are ready"),
305        )
306    } else if ready_instances > 0 {
307        debug!(ready_instances, instance_count, "Cluster progressing");
308        (
309            "False",
310            REASON_PARTIALLY_READY,
311            format!("{ready_instances}/{instance_count} instances are ready"),
312        )
313    } else {
314        debug!("Waiting for instances to become ready");
315        (
316            "False",
317            REASON_NOT_READY,
318            "No instances are ready".to_string(),
319        )
320    };
321
322    let encompassing_condition = Condition {
323        r#type: CONDITION_TYPE_READY.to_string(),
324        status: encompassing_status.to_string(),
325        reason: Some(encompassing_reason.to_string()),
326        message: Some(encompassing_message.clone()),
327        last_transition_time: Some(Utc::now().to_rfc3339()),
328    };
329
330    // Combine encompassing condition + instance-level conditions
331    let mut all_conditions = vec![encompassing_condition];
332    all_conditions.extend(instance_conditions);
333
334    debug!(
335        status = %encompassing_status,
336        message = %encompassing_message,
337        num_conditions = all_conditions.len(),
338        "Determined cluster status"
339    );
340
341    (
342        instance_count,
343        ready_instances,
344        instance_names,
345        all_conditions,
346    )
347}
348
349/// Update the status of a `Bind9Cluster` with multiple conditions
350async fn update_status(
351    client: &Client,
352    cluster: &Bind9Cluster,
353    conditions: Vec<Condition>,
354    instance_count: i32,
355    ready_instances: i32,
356    instances: Vec<String>,
357) -> Result<()> {
358    let api: Api<Bind9Cluster> =
359        Api::namespaced(client.clone(), &cluster.namespace().unwrap_or_default());
360
361    // Check if status has actually changed
362    let current_status = &cluster.status;
363    let status_changed =
364        if let Some(current) = current_status {
365            // Check if counts changed
366            if current.instance_count != Some(instance_count)
367                || current.ready_instances != Some(ready_instances)
368                || current.instances != instances
369            {
370                true
371            } else {
372                // Check if any condition changed
373                if current.conditions.len() == conditions.len() {
374                    // Compare each condition
375                    current.conditions.iter().zip(conditions.iter()).any(
376                        |(current_cond, new_cond)| {
377                            current_cond.r#type != new_cond.r#type
378                                || current_cond.status != new_cond.status
379                                || current_cond.message != new_cond.message
380                                || current_cond.reason != new_cond.reason
381                        },
382                    )
383                } else {
384                    true
385                }
386            }
387        } else {
388            // No status exists, need to update
389            true
390        };
391
392    // Only update if status has changed
393    if !status_changed {
394        debug!(
395            namespace = %cluster.namespace().unwrap_or_default(),
396            name = %cluster.name_any(),
397            "Status unchanged, skipping update"
398        );
399        info!(
400            "Bind9Cluster {}/{} status unchanged, skipping update",
401            cluster.namespace().unwrap_or_default(),
402            cluster.name_any()
403        );
404        return Ok(());
405    }
406
407    debug!(
408        instance_count,
409        ready_instances,
410        instances_count = instances.len(),
411        num_conditions = conditions.len(),
412        "Preparing status update"
413    );
414
415    let new_status = Bind9ClusterStatus {
416        conditions,
417        observed_generation: cluster.metadata.generation,
418        instance_count: Some(instance_count),
419        ready_instances: Some(ready_instances),
420        instances,
421    };
422
423    info!(
424        "Updating Bind9Cluster {}/{} status: {} instances, {} ready",
425        cluster.namespace().unwrap_or_default(),
426        cluster.name_any(),
427        instance_count,
428        ready_instances
429    );
430
431    let patch = json!({ "status": new_status });
432    api.patch_status(
433        &cluster.name_any(),
434        &PatchParams::apply("bindy-controller"),
435        &Patch::Merge(&patch),
436    )
437    .await?;
438
439    Ok(())
440}
441
442/// Reconcile managed `Bind9Instance` resources for a cluster
443///
444/// This function ensures the correct number of primary and secondary instances exist
445/// based on the cluster spec. It creates missing instances and adds management labels.
446///
447/// # Arguments
448///
449/// * `client` - Kubernetes API client
450/// * `cluster` - The `Bind9Cluster` resource
451///
452/// # Errors
453///
454/// Returns an error if:
455/// - Failed to list existing instances
456/// - Failed to create new instances
457#[allow(clippy::too_many_lines)]
458async fn reconcile_managed_instances(client: &Client, cluster: &Bind9Cluster) -> Result<()> {
459    let namespace = cluster.namespace().unwrap_or_default();
460    let cluster_name = cluster.name_any();
461
462    info!(
463        "Reconciling managed instances for cluster {}/{}",
464        namespace, cluster_name
465    );
466
467    // Get desired replica counts from spec
468    let primary_replicas = cluster
469        .spec
470        .common
471        .primary
472        .as_ref()
473        .and_then(|p| p.replicas)
474        .unwrap_or(0);
475
476    let secondary_replicas = cluster
477        .spec
478        .common
479        .secondary
480        .as_ref()
481        .and_then(|s| s.replicas)
482        .unwrap_or(0);
483
484    debug!(
485        "Desired replicas: {} primary, {} secondary",
486        primary_replicas, secondary_replicas
487    );
488
489    if primary_replicas == 0 && secondary_replicas == 0 {
490        debug!(
491            "No instances requested for cluster {}/{}",
492            namespace, cluster_name
493        );
494        return Ok(());
495    }
496
497    // List existing managed instances
498    let api: Api<Bind9Instance> = Api::namespaced(client.clone(), &namespace);
499    let instances = api.list(&ListParams::default()).await?;
500
501    // Filter for managed instances of this cluster
502    let managed_instances: Vec<_> = instances
503        .items
504        .into_iter()
505        .filter(|instance| {
506            // Check if instance has management labels
507            instance.metadata.labels.as_ref().is_some_and(|labels| {
508                labels.get(BINDY_MANAGED_BY_LABEL) == Some(&MANAGED_BY_BIND9_CLUSTER.to_string())
509                    && labels.get(BINDY_CLUSTER_LABEL) == Some(&cluster_name)
510            })
511        })
512        .collect();
513
514    debug!(
515        "Found {} managed instances for cluster {}/{}",
516        managed_instances.len(),
517        namespace,
518        cluster_name
519    );
520
521    // Separate by role
522    let existing_primary: Vec<_> = managed_instances
523        .iter()
524        .filter(|i| i.spec.role == ServerRole::Primary)
525        .collect();
526
527    let existing_secondary: Vec<_> = managed_instances
528        .iter()
529        .filter(|i| i.spec.role == ServerRole::Secondary)
530        .collect();
531
532    debug!(
533        "Existing instances: {} primary, {} secondary",
534        existing_primary.len(),
535        existing_secondary.len()
536    );
537
538    // Create ownerReference to the Bind9Cluster
539    let owner_ref = k8s_openapi::apimachinery::pkg::apis::meta::v1::OwnerReference {
540        api_version: API_GROUP_VERSION.to_string(),
541        kind: KIND_BIND9_CLUSTER.to_string(),
542        name: cluster_name.clone(),
543        uid: cluster.metadata.uid.clone().unwrap_or_default(),
544        controller: Some(true),
545        block_owner_deletion: Some(true),
546    };
547
548    // Handle scale-up: Create missing primary instances
549    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
550    let primaries_to_create = (primary_replicas as usize).saturating_sub(existing_primary.len());
551    for i in 0..primaries_to_create {
552        let index = existing_primary.len() + i;
553        create_managed_instance_with_owner(
554            client,
555            &namespace,
556            &cluster_name,
557            ServerRole::Primary,
558            index,
559            &cluster.spec.common,
560            Some(owner_ref.clone()),
561        )
562        .await?;
563    }
564
565    // Handle scale-down: Delete excess primary instances
566    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
567    let primaries_to_delete = existing_primary
568        .len()
569        .saturating_sub(primary_replicas as usize);
570    if primaries_to_delete > 0 {
571        // Sort by index descending to delete highest-indexed instances first
572        let mut sorted_primary: Vec<_> = existing_primary.iter().collect();
573        sorted_primary.sort_by_key(|instance| {
574            instance
575                .metadata
576                .annotations
577                .as_ref()
578                .and_then(|a| a.get(BINDY_INSTANCE_INDEX_ANNOTATION))
579                .and_then(|idx| idx.parse::<usize>().ok())
580                .unwrap_or(0)
581        });
582        sorted_primary.reverse();
583
584        for instance in sorted_primary.iter().take(primaries_to_delete) {
585            let instance_name = instance.name_any();
586            delete_managed_instance(client, &namespace, &instance_name).await?;
587        }
588    }
589
590    // Handle scale-up: Create missing secondary instances
591    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
592    let secondaries_to_create =
593        (secondary_replicas as usize).saturating_sub(existing_secondary.len());
594    for i in 0..secondaries_to_create {
595        let index = existing_secondary.len() + i;
596        create_managed_instance_with_owner(
597            client,
598            &namespace,
599            &cluster_name,
600            ServerRole::Secondary,
601            index,
602            &cluster.spec.common,
603            Some(owner_ref.clone()),
604        )
605        .await?;
606    }
607
608    // Handle scale-down: Delete excess secondary instances
609    #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
610    let secondaries_to_delete = existing_secondary
611        .len()
612        .saturating_sub(secondary_replicas as usize);
613    if secondaries_to_delete > 0 {
614        // Sort by index descending to delete highest-indexed instances first
615        let mut sorted_secondary: Vec<_> = existing_secondary.iter().collect();
616        sorted_secondary.sort_by_key(|instance| {
617            instance
618                .metadata
619                .annotations
620                .as_ref()
621                .and_then(|a| a.get(BINDY_INSTANCE_INDEX_ANNOTATION))
622                .and_then(|idx| idx.parse::<usize>().ok())
623                .unwrap_or(0)
624        });
625        sorted_secondary.reverse();
626
627        for instance in sorted_secondary.iter().take(secondaries_to_delete) {
628            let instance_name = instance.name_any();
629            delete_managed_instance(client, &namespace, &instance_name).await?;
630        }
631    }
632
633    if primaries_to_create > 0
634        || secondaries_to_create > 0
635        || primaries_to_delete > 0
636        || secondaries_to_delete > 0
637    {
638        info!(
639            "Scaled cluster {}/{}: created {} primary, {} secondary; deleted {} primary, {} secondary",
640            namespace,
641            cluster_name,
642            primaries_to_create,
643            secondaries_to_create,
644            primaries_to_delete,
645            secondaries_to_delete
646        );
647    } else {
648        debug!(
649            "Cluster {}/{} already at desired scale",
650            namespace, cluster_name
651        );
652    }
653
654    // Update existing managed instances to match cluster spec (declarative reconciliation)
655    update_existing_managed_instances(
656        client,
657        &namespace,
658        &cluster_name,
659        &cluster.spec.common,
660        &managed_instances,
661    )
662    .await?;
663
664    // Ensure child resources (ConfigMaps, Secrets, Services, Deployments) exist for all managed instances
665    ensure_managed_instance_resources(client, cluster, &managed_instances).await?;
666
667    Ok(())
668}
669
670/// Update existing managed instances to match the cluster's current spec.
671///
672/// This implements true declarative reconciliation - comparing the desired state (from cluster spec)
673/// with the actual state (existing instance specs) and updating any instances that have drifted.
674///
675/// This ensures that when the cluster's `spec.common` changes (e.g., bindcar version, volumes,
676/// config references), all managed instances are updated to reflect the new configuration.
677///
678/// # Arguments
679///
680/// * `client` - Kubernetes API client
681/// * `namespace` - Namespace containing the instances
682/// * `cluster_name` - Name of the parent cluster
683/// * `common_spec` - The cluster's common spec (source of truth)
684/// * `managed_instances` - List of existing managed instances to check
685///
686/// # Errors
687///
688/// Returns an error if patching instances fails
689async fn update_existing_managed_instances(
690    client: &Client,
691    namespace: &str,
692    cluster_name: &str,
693    common_spec: &crate::crd::Bind9ClusterCommonSpec,
694    managed_instances: &[Bind9Instance],
695) -> Result<()> {
696    if managed_instances.is_empty() {
697        return Ok(());
698    }
699
700    let instance_api: Api<Bind9Instance> = Api::namespaced(client.clone(), namespace);
701    let mut updated_count = 0;
702
703    for instance in managed_instances {
704        let instance_name = instance.name_any();
705
706        // Build the desired spec based on current cluster configuration
707        let desired_bindcar_config = common_spec
708            .global
709            .as_ref()
710            .and_then(|g| g.bindcar_config.clone());
711
712        // Check if instance spec needs updating by comparing key fields
713        let needs_update = instance.spec.version != common_spec.version
714            || instance.spec.image != common_spec.image
715            || instance.spec.config_map_refs != common_spec.config_map_refs
716            || instance.spec.volumes != common_spec.volumes
717            || instance.spec.volume_mounts != common_spec.volume_mounts
718            || instance.spec.bindcar_config != desired_bindcar_config;
719
720        if needs_update {
721            debug!(
722                "Instance {}/{} spec differs from cluster spec, updating",
723                namespace, instance_name
724            );
725
726            // Build updated instance spec - preserve instance-specific fields, update cluster-inherited fields
727            let updated_spec = Bind9InstanceSpec {
728                cluster_ref: instance.spec.cluster_ref.clone(),
729                role: instance.spec.role.clone(),
730                replicas: instance.spec.replicas, // Preserve instance replicas (always 1 for managed)
731                version: common_spec.version.clone(),
732                image: common_spec.image.clone(),
733                config_map_refs: common_spec.config_map_refs.clone(),
734                config: None, // Managed instances inherit from cluster
735                primary_servers: instance.spec.primary_servers.clone(), // Preserve if set
736                volumes: common_spec.volumes.clone(),
737                volume_mounts: common_spec.volume_mounts.clone(),
738                rndc_secret_ref: instance.spec.rndc_secret_ref.clone(), // Preserve if set
739                storage: instance.spec.storage.clone(),                 // Preserve if set
740                bindcar_config: desired_bindcar_config,
741            };
742
743            // Use server-side apply to update the instance spec
744            let patch = serde_json::json!({
745                "apiVersion": API_GROUP_VERSION,
746                "kind": KIND_BIND9_INSTANCE,
747                "metadata": {
748                    "name": instance_name,
749                    "namespace": namespace,
750                },
751                "spec": updated_spec,
752            });
753
754            match instance_api
755                .patch(
756                    &instance_name,
757                    &PatchParams::apply("bindy-controller").force(),
758                    &Patch::Apply(&patch),
759                )
760                .await
761            {
762                Ok(_) => {
763                    info!(
764                        "Updated managed instance {}/{} to match cluster spec",
765                        namespace, instance_name
766                    );
767                    updated_count += 1;
768                }
769                Err(e) => {
770                    error!(
771                        "Failed to update managed instance {}/{}: {}",
772                        namespace, instance_name, e
773                    );
774                    return Err(e.into());
775                }
776            }
777        } else {
778            debug!(
779                "Instance {}/{} spec matches cluster spec, no update needed",
780                namespace, instance_name
781            );
782        }
783    }
784
785    if updated_count > 0 {
786        info!(
787            "Updated {} managed instances in cluster {}/{} to match current spec",
788            updated_count, namespace, cluster_name
789        );
790    }
791
792    Ok(())
793}
794
795/// Ensure child resources exist for all managed instances
796///
797/// This function verifies that all Kubernetes resources (`ConfigMap`, `Secret`, `Service`, `Deployment`)
798/// exist for each managed instance. If any resource is missing, it triggers reconciliation
799/// by updating the instance's annotations to force the `Bind9Instance` controller to recreate them.
800///
801/// # Arguments
802///
803/// * `client` - Kubernetes API client
804/// * `cluster` - The parent `Bind9Cluster`
805/// * `managed_instances` - List of managed `Bind9Instance` resources
806///
807/// # Errors
808///
809/// Returns an error if resource checking or instance update fails
810async fn ensure_managed_instance_resources(
811    client: &Client,
812    cluster: &Bind9Cluster,
813    managed_instances: &[Bind9Instance],
814) -> Result<()> {
815    let namespace = cluster.namespace().unwrap_or_default();
816    let cluster_name = cluster.name_any();
817
818    if managed_instances.is_empty() {
819        return Ok(());
820    }
821
822    debug!(
823        "Ensuring child resources exist for {} managed instances in cluster {}/{}",
824        managed_instances.len(),
825        namespace,
826        cluster_name
827    );
828
829    let configmap_api: Api<ConfigMap> = Api::namespaced(client.clone(), &namespace);
830    let secret_api: Api<Secret> = Api::namespaced(client.clone(), &namespace);
831    let service_api: Api<Service> = Api::namespaced(client.clone(), &namespace);
832    let deployment_api: Api<Deployment> = Api::namespaced(client.clone(), &namespace);
833    let instance_api: Api<Bind9Instance> = Api::namespaced(client.clone(), &namespace);
834
835    for instance in managed_instances {
836        let instance_name = instance.name_any();
837        let mut missing_resources = Vec::new();
838
839        // Check ConfigMap
840        let configmap_name = format!("{instance_name}-config");
841        if configmap_api.get(&configmap_name).await.is_err() {
842            missing_resources.push("ConfigMap");
843        }
844
845        // Check RNDC Secret
846        let secret_name = format!("{instance_name}-rndc-key");
847        if secret_api.get(&secret_name).await.is_err() {
848            missing_resources.push("Secret");
849        }
850
851        // Check Service
852        if service_api.get(&instance_name).await.is_err() {
853            missing_resources.push("Service");
854        }
855
856        // Check Deployment
857        if deployment_api.get(&instance_name).await.is_err() {
858            missing_resources.push("Deployment");
859        }
860
861        // If any resources are missing, trigger instance reconciliation
862        if missing_resources.is_empty() {
863            debug!(
864                "All child resources exist for managed instance {}/{}",
865                namespace, instance_name
866            );
867        } else {
868            warn!(
869                "Missing resources for managed instance {}/{}: {}. Triggering reconciliation.",
870                namespace,
871                instance_name,
872                missing_resources.join(", ")
873            );
874
875            // Force reconciliation by updating an annotation
876            let patch = json!({
877                "metadata": {
878                    "annotations": {
879                        BINDY_RECONCILE_TRIGGER_ANNOTATION: Utc::now().to_rfc3339()
880                    }
881                }
882            });
883
884            instance_api
885                .patch(
886                    &instance_name,
887                    &PatchParams::apply("bindy-cluster-controller"),
888                    &Patch::Merge(&patch),
889                )
890                .await?;
891
892            info!(
893                "Triggered reconciliation for instance {}/{} to recreate: {}",
894                namespace,
895                instance_name,
896                missing_resources.join(", ")
897            );
898        }
899    }
900
901    Ok(())
902}
903
904/// Create a managed `Bind9Instance` resource
905///
906/// This function is public to allow reuse by `ClusterBind9Provider` reconciler.
907///
908/// # Arguments
909///
910/// * `client` - Kubernetes API client
911/// * `namespace` - Namespace for the instance
912/// * `cluster_name` - Name of the cluster (namespace-scoped or global)
913/// * `role` - Role of the instance (Primary or Secondary)
914/// * `index` - Index of this instance within its role
915/// * `common_spec` - The cluster's common specification
916/// * `is_global` - Whether this is for a global cluster
917///
918/// # Errors
919///
920/// Returns an error if instance creation fails
921#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
922pub async fn create_managed_instance(
923    client: &Client,
924    namespace: &str,
925    cluster_name: &str,
926    role: ServerRole,
927    index: usize,
928    common_spec: &crate::crd::Bind9ClusterCommonSpec,
929    _is_global: bool,
930) -> Result<()> {
931    create_managed_instance_with_owner(
932        client,
933        namespace,
934        cluster_name,
935        role,
936        index,
937        common_spec,
938        None, // No owner reference - for backward compatibility
939    )
940    .await
941}
942
943/// Create a managed `Bind9Instance` with optional ownerReference.
944///
945/// This is the internal implementation that supports setting ownerReferences.
946/// Use `create_managed_instance()` for backward compatibility without ownerReferences.
947///
948/// # Arguments
949///
950/// * `owner_ref` - Optional ownerReference to the parent `Bind9Cluster`
951#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
952async fn create_managed_instance_with_owner(
953    client: &Client,
954    namespace: &str,
955    cluster_name: &str,
956    role: ServerRole,
957    index: usize,
958    common_spec: &crate::crd::Bind9ClusterCommonSpec,
959    owner_ref: Option<k8s_openapi::apimachinery::pkg::apis::meta::v1::OwnerReference>,
960) -> Result<()> {
961    let role_str = match role {
962        ServerRole::Primary => ROLE_PRIMARY,
963        ServerRole::Secondary => ROLE_SECONDARY,
964    };
965
966    let instance_name = format!("{cluster_name}-{role_str}-{index}");
967
968    info!(
969        "Creating managed instance {}/{} for cluster {} (role: {:?}, index: {})",
970        namespace, instance_name, cluster_name, role, index
971    );
972
973    // Create labels
974    let mut labels = BTreeMap::new();
975    labels.insert(
976        BINDY_MANAGED_BY_LABEL.to_string(),
977        MANAGED_BY_BIND9_CLUSTER.to_string(),
978    );
979    labels.insert(BINDY_CLUSTER_LABEL.to_string(), cluster_name.to_string());
980    labels.insert(BINDY_ROLE_LABEL.to_string(), role_str.to_string());
981    labels.insert(K8S_PART_OF.to_string(), PART_OF_BINDY.to_string());
982
983    // Create annotations
984    let mut annotations = BTreeMap::new();
985    annotations.insert(
986        BINDY_INSTANCE_INDEX_ANNOTATION.to_string(),
987        index.to_string(),
988    );
989
990    // Build instance spec - copy configuration from cluster
991    let instance_spec = Bind9InstanceSpec {
992        cluster_ref: cluster_name.to_string(),
993        role,
994        replicas: Some(1), // Each managed instance has 1 replica
995        version: common_spec.version.clone(),
996        image: common_spec.image.clone(),
997        config_map_refs: common_spec.config_map_refs.clone(),
998        config: None,          // Inherit from cluster
999        primary_servers: None, // TODO: Could populate for secondaries
1000        volumes: common_spec.volumes.clone(),
1001        volume_mounts: common_spec.volume_mounts.clone(),
1002        rndc_secret_ref: None, // Inherit from cluster/role config
1003        storage: None,         // Use default (emptyDir)
1004        bindcar_config: common_spec
1005            .global
1006            .as_ref()
1007            .and_then(|g| g.bindcar_config.clone()),
1008    };
1009
1010    let instance = Bind9Instance {
1011        metadata: ObjectMeta {
1012            name: Some(instance_name.clone()),
1013            namespace: Some(namespace.to_string()),
1014            labels: Some(labels),
1015            annotations: Some(annotations),
1016            owner_references: owner_ref.map(|r| vec![r]),
1017            ..Default::default()
1018        },
1019        spec: instance_spec,
1020        status: None,
1021    };
1022
1023    let api: Api<Bind9Instance> = Api::namespaced(client.clone(), namespace);
1024
1025    match api.create(&PostParams::default(), &instance).await {
1026        Ok(_) => {
1027            info!(
1028                "Successfully created managed instance {}/{}",
1029                namespace, instance_name
1030            );
1031            Ok(())
1032        }
1033        Err(e) => {
1034            // If already exists, patch it to ensure spec is up to date
1035            if e.to_string().contains("AlreadyExists") {
1036                debug!(
1037                    "Managed instance {}/{} already exists, patching with updated spec",
1038                    namespace, instance_name
1039                );
1040
1041                // Build a complete patch object for server-side apply
1042                let patch = serde_json::json!({
1043                    "apiVersion": API_GROUP_VERSION,
1044                    "kind": KIND_BIND9_INSTANCE,
1045                    "metadata": {
1046                        "name": instance_name,
1047                        "namespace": namespace,
1048                        "labels": {
1049                            BINDY_MANAGED_BY_LABEL: MANAGED_BY_BIND9_CLUSTER,
1050                            BINDY_CLUSTER_LABEL: cluster_name,
1051                            BINDY_ROLE_LABEL: role_str,
1052                            K8S_PART_OF: PART_OF_BINDY,
1053                        },
1054                        "annotations": {
1055                            BINDY_INSTANCE_INDEX_ANNOTATION: index.to_string(),
1056                        },
1057                        "ownerReferences": instance.metadata.owner_references,
1058                    },
1059                    "spec": instance.spec,
1060                });
1061
1062                // Apply the patch to update the spec, labels, annotations, and owner references
1063                match api
1064                    .patch(
1065                        &instance_name,
1066                        &PatchParams::apply("bindy-controller").force(),
1067                        &Patch::Apply(&patch),
1068                    )
1069                    .await
1070                {
1071                    Ok(_) => {
1072                        info!(
1073                            "Successfully patched managed instance {}/{} with updated spec",
1074                            namespace, instance_name
1075                        );
1076                        Ok(())
1077                    }
1078                    Err(patch_err) => {
1079                        error!(
1080                            "Failed to patch managed instance {}/{}: {}",
1081                            namespace, instance_name, patch_err
1082                        );
1083                        Err(patch_err.into())
1084                    }
1085                }
1086            } else {
1087                error!(
1088                    "Failed to create managed instance {}/{}: {}",
1089                    namespace, instance_name, e
1090                );
1091                Err(e.into())
1092            }
1093        }
1094    }
1095}
1096
1097/// Create or update the shared cluster-level `ConfigMap`
1098///
1099/// This `ConfigMap` contains BIND9 configuration that is shared across all instances
1100/// in the cluster. It is created from `spec.global` configuration.
1101///
1102/// # Arguments
1103///
1104/// * `client` - Kubernetes API client
1105/// * `cluster` - The `Bind9Cluster` resource
1106///
1107/// # Errors
1108///
1109/// Returns an error if:
1110/// - Failed to create or update the `ConfigMap`
1111/// - Kubernetes API operations fail
1112async fn create_or_update_cluster_configmap(client: &Client, cluster: &Bind9Cluster) -> Result<()> {
1113    use crate::bind9_resources::build_cluster_configmap;
1114
1115    let namespace = cluster.namespace().unwrap_or_default();
1116    let name = cluster.name_any();
1117
1118    // Check if custom ConfigMaps are referenced at the cluster level
1119    if let Some(refs) = &cluster.spec.common.config_map_refs {
1120        if refs.named_conf.is_some() || refs.named_conf_options.is_some() {
1121            info!(
1122                "Cluster {}/{} uses custom ConfigMaps, skipping cluster ConfigMap creation",
1123                namespace, name
1124            );
1125            return Ok(());
1126        }
1127    }
1128
1129    info!(
1130        "Creating/updating shared ConfigMap for cluster {}/{}",
1131        namespace, name
1132    );
1133
1134    // Build the cluster ConfigMap
1135    let configmap = build_cluster_configmap(&name, &namespace, cluster)?;
1136
1137    let cm_api: Api<ConfigMap> = Api::namespaced(client.clone(), &namespace);
1138    let cm_name = format!("{name}-config");
1139
1140    if (cm_api.get(&cm_name).await).is_ok() {
1141        // ConfigMap exists, update it
1142        info!("Updating cluster ConfigMap {}/{}", namespace, cm_name);
1143        cm_api
1144            .replace(&cm_name, &PostParams::default(), &configmap)
1145            .await?;
1146    } else {
1147        // ConfigMap doesn't exist, create it
1148        info!("Creating cluster ConfigMap {}/{}", namespace, cm_name);
1149        cm_api.create(&PostParams::default(), &configmap).await?;
1150    }
1151
1152    Ok(())
1153}
1154
1155/// Delete a single managed `Bind9Instance` resource
1156///
1157/// This function is public to allow reuse by `ClusterBind9Provider` reconciler.
1158///
1159/// # Arguments
1160///
1161/// * `client` - Kubernetes API client
1162/// * `namespace` - Namespace of the instance
1163/// * `instance_name` - Name of the instance to delete
1164///
1165/// # Errors
1166///
1167/// Returns an error if deletion fails (except for `NotFound` errors, which are treated as success)
1168pub async fn delete_managed_instance(
1169    client: &Client,
1170    namespace: &str,
1171    instance_name: &str,
1172) -> Result<()> {
1173    let api: Api<Bind9Instance> = Api::namespaced(client.clone(), namespace);
1174
1175    match api.delete(instance_name, &DeleteParams::default()).await {
1176        Ok(_) => {
1177            info!(
1178                "Successfully deleted managed instance {}/{}",
1179                namespace, instance_name
1180            );
1181            Ok(())
1182        }
1183        Err(e) if e.to_string().contains("NotFound") => {
1184            debug!(
1185                "Managed instance {}/{} already deleted",
1186                namespace, instance_name
1187            );
1188            Ok(())
1189        }
1190        Err(e) => {
1191            error!(
1192                "Failed to delete managed instance {}/{}: {}",
1193                namespace, instance_name, e
1194            );
1195            Err(e.into())
1196        }
1197    }
1198}
1199
1200/// Delete all `Bind9Instance` resources that reference the given cluster
1201///
1202/// # Arguments
1203///
1204/// * `client` - Kubernetes API client
1205/// * `namespace` - Namespace containing the instances
1206/// * `cluster_name` - Name of the cluster being deleted
1207///
1208/// # Errors
1209///
1210/// Returns an error if:
1211/// - Failed to list `Bind9Instance` resources
1212/// - Failed to delete any `Bind9Instance` resource
1213async fn delete_cluster_instances(
1214    client: &Client,
1215    namespace: &str,
1216    cluster_name: &str,
1217) -> Result<()> {
1218    let api: Api<Bind9Instance> = Api::namespaced(client.clone(), namespace);
1219
1220    info!(
1221        "Finding all Bind9Instance resources for cluster {}/{}",
1222        namespace, cluster_name
1223    );
1224
1225    // List all instances in the namespace
1226    let instances = api.list(&ListParams::default()).await?;
1227
1228    // Filter instances that reference this cluster
1229    let cluster_instances: Vec<_> = instances
1230        .items
1231        .into_iter()
1232        .filter(|instance| instance.spec.cluster_ref == cluster_name)
1233        .collect();
1234
1235    if cluster_instances.is_empty() {
1236        info!(
1237            "No Bind9Instance resources found for cluster {}/{}",
1238            namespace, cluster_name
1239        );
1240        return Ok(());
1241    }
1242
1243    info!(
1244        "Found {} Bind9Instance resources for cluster {}/{}, deleting...",
1245        cluster_instances.len(),
1246        namespace,
1247        cluster_name
1248    );
1249
1250    // Delete each instance
1251    for instance in cluster_instances {
1252        let instance_name = instance.name_any();
1253        info!(
1254            "Deleting Bind9Instance {}/{} (clusterRef: {})",
1255            namespace, instance_name, cluster_name
1256        );
1257
1258        match api.delete(&instance_name, &DeleteParams::default()).await {
1259            Ok(_) => {
1260                info!(
1261                    "Successfully deleted Bind9Instance {}/{}",
1262                    namespace, instance_name
1263                );
1264            }
1265            Err(e) => {
1266                // If the resource is already deleted, treat it as success
1267                if e.to_string().contains("NotFound") {
1268                    warn!(
1269                        "Bind9Instance {}/{} already deleted",
1270                        namespace, instance_name
1271                    );
1272                } else {
1273                    error!(
1274                        "Failed to delete Bind9Instance {}/{}: {}",
1275                        namespace, instance_name, e
1276                    );
1277                    return Err(e.into());
1278                }
1279            }
1280        }
1281    }
1282
1283    info!(
1284        "Successfully deleted all Bind9Instance resources for cluster {}/{}",
1285        namespace, cluster_name
1286    );
1287
1288    Ok(())
1289}
1290
1291/// Delete handler for `Bind9Cluster` resources (cleanup logic)
1292///
1293/// This function is no longer used as deletion is handled by the finalizer in `reconcile_bind9cluster`.
1294/// Kept for backward compatibility.
1295///
1296/// # Errors
1297///
1298/// This function currently never returns an error, but returns `Result` for API consistency.
1299pub async fn delete_bind9cluster(_client: Client, _cluster: Bind9Cluster) -> Result<()> {
1300    // Deletion is now handled by the finalizer in reconcile_bind9cluster
1301    Ok(())
1302}