bindy/reconcilers/
clusterbind9provider.rs

1// Copyright (c) 2025 Erick Bourgeois, firestoned
2// SPDX-License-Identifier: MIT
3
4//! BIND9 global cluster (cluster-scoped) reconciliation logic.
5//!
6//! This module handles the lifecycle of cluster-scoped BIND9 cluster resources.
7//! It manages `Bind9Instance` resources across all namespaces that reference this
8//! global cluster.
9//!
10//! The key difference from namespace-scoped `Bind9Cluster` is that:
11//! - `ClusterBind9Provider` resources are cluster-scoped (no namespace)
12//! - Instances can be created in any namespace and reference the global cluster
13//! - The reconciler must list instances across all namespaces
14
15use crate::constants::{API_GROUP_VERSION, KIND_BIND9_CLUSTER, KIND_CLUSTER_BIND9_PROVIDER};
16use crate::crd::{
17    Bind9Cluster, Bind9ClusterStatus, Bind9Instance, ClusterBind9Provider, Condition,
18};
19use crate::labels::FINALIZER_BIND9_CLUSTER;
20use crate::reconcilers::finalizers::{
21    ensure_cluster_finalizer, handle_cluster_deletion, FinalizerCleanup,
22};
23use crate::status_reasons::{
24    CONDITION_TYPE_READY, REASON_ALL_READY, REASON_NOT_READY, REASON_NO_CHILDREN,
25    REASON_PARTIALLY_READY,
26};
27use anyhow::Result;
28use chrono::Utc;
29use kube::{
30    api::{ListParams, Patch, PatchParams},
31    client::Client,
32    Api, ResourceExt,
33};
34use serde_json::json;
35use tracing::{debug, error, info, warn};
36
37/// Implement finalizer cleanup for `ClusterBind9Provider`.
38///
39/// This handles deletion of all managed `Bind9Cluster` resources when the
40/// global cluster is deleted.
41#[async_trait::async_trait]
42impl FinalizerCleanup for ClusterBind9Provider {
43    async fn cleanup(&self, client: &Client) -> Result<()> {
44        use crate::labels::{
45            BINDY_CLUSTER_LABEL, BINDY_MANAGED_BY_LABEL, MANAGED_BY_CLUSTER_BIND9_PROVIDER,
46        };
47        use kube::api::DeleteParams;
48
49        let name = self.name_any();
50
51        // Step 1: Delete all managed Bind9Cluster resources
52        info!(
53            "Deleting managed Bind9Cluster resources for global cluster {}",
54            name
55        );
56
57        let clusters_api: Api<Bind9Cluster> = Api::all(client.clone());
58        let all_clusters = clusters_api.list(&ListParams::default()).await?;
59
60        // Filter clusters managed by this global cluster
61        let managed_clusters: Vec<_> = all_clusters
62            .items
63            .iter()
64            .filter(|c| {
65                c.metadata.labels.as_ref().is_some_and(|labels| {
66                    labels.get(BINDY_MANAGED_BY_LABEL)
67                        == Some(&MANAGED_BY_CLUSTER_BIND9_PROVIDER.to_string())
68                        && labels.get(BINDY_CLUSTER_LABEL) == Some(&name.clone())
69                })
70            })
71            .collect();
72
73        if !managed_clusters.is_empty() {
74            info!(
75                "Found {} managed Bind9Cluster resources to delete for global cluster {}",
76                managed_clusters.len(),
77                name
78            );
79
80            for managed_cluster in managed_clusters {
81                let cluster_name = managed_cluster.name_any();
82                let cluster_namespace = managed_cluster.namespace().unwrap_or_default();
83
84                info!(
85                    "Deleting managed Bind9Cluster {}/{} for global cluster {}",
86                    cluster_namespace, cluster_name, name
87                );
88
89                let api: Api<Bind9Cluster> = Api::namespaced(client.clone(), &cluster_namespace);
90                match api.delete(&cluster_name, &DeleteParams::default()).await {
91                    Ok(_) => {
92                        info!(
93                            "Successfully deleted Bind9Cluster {}/{}",
94                            cluster_namespace, cluster_name
95                        );
96                    }
97                    Err(e) => {
98                        // If already deleted or not found, that's fine
99                        if e.to_string().contains("NotFound") {
100                            debug!(
101                                "Bind9Cluster {}/{} already deleted",
102                                cluster_namespace, cluster_name
103                            );
104                        } else {
105                            error!(
106                                "Failed to delete Bind9Cluster {}/{}: {}",
107                                cluster_namespace, cluster_name, e
108                            );
109                            return Err(e.into());
110                        }
111                    }
112                }
113            }
114        }
115
116        // Step 2: Check for orphaned Bind9Instance resources (warn only, don't delete)
117        // Note: Instances will be cleaned up by their parent Bind9Cluster's finalizer
118        let instances_api: Api<Bind9Instance> = Api::all(client.clone());
119        let instances = instances_api.list(&ListParams::default()).await?;
120
121        let referencing_instances: Vec<_> = instances
122            .items
123            .iter()
124            .filter(|inst| inst.spec.cluster_ref == name)
125            .collect();
126
127        if !referencing_instances.is_empty() {
128            warn!(
129                "ClusterBind9Provider {} still has {} referencing instances. \
130                These will be cleaned up by their parent Bind9Cluster finalizers.",
131                name,
132                referencing_instances.len()
133            );
134        }
135
136        Ok(())
137    }
138}
139
140/// Reconciles a cluster-scoped `ClusterBind9Provider` resource.
141///
142/// This function:
143/// 1. Checks if the cluster is being deleted and handles cleanup
144/// 2. Adds finalizer if not present
145/// 3. Lists all `Bind9Instance` resources across all namespaces that reference this global cluster
146/// 4. Updates cluster status based on instance health
147///
148/// # Arguments
149///
150/// * `client` - Kubernetes API client
151/// * `cluster` - The `ClusterBind9Provider` resource to reconcile
152///
153/// # Returns
154///
155/// * `Ok(())` - If reconciliation succeeded
156/// * `Err(_)` - If status update failed
157///
158/// # Errors
159///
160/// Returns an error if Kubernetes API operations fail or status update fails.
161pub async fn reconcile_clusterbind9provider(
162    client: Client,
163    cluster: ClusterBind9Provider,
164) -> Result<()> {
165    let name = cluster.name_any();
166
167    info!("Reconciling ClusterBind9Provider: {}", name);
168    debug!(
169        name = %name,
170        generation = ?cluster.metadata.generation,
171        "Starting ClusterBind9Provider reconciliation (cluster-scoped)"
172    );
173
174    // Handle deletion if cluster is being deleted
175    if cluster.metadata.deletion_timestamp.is_some() {
176        return handle_cluster_deletion(&client, &cluster, FINALIZER_BIND9_CLUSTER).await;
177    }
178
179    // Ensure finalizer is present
180    ensure_cluster_finalizer(&client, &cluster, FINALIZER_BIND9_CLUSTER).await?;
181
182    // Check if spec has changed using the standard generation check
183    let current_generation = cluster.metadata.generation;
184    let observed_generation = cluster.status.as_ref().and_then(|s| s.observed_generation);
185
186    // Only reconcile resources if spec changed or we haven't processed this resource yet
187    if !crate::reconcilers::should_reconcile(current_generation, observed_generation) {
188        debug!(
189            "Spec unchanged (generation={:?}), skipping resource reconciliation",
190            current_generation
191        );
192        // Update status from current instance states (only patches if status changed)
193        update_cluster_status(&client, &cluster).await?;
194        return Ok(());
195    }
196
197    debug!(
198        "Reconciliation needed: current_generation={:?}, observed_generation={:?}",
199        current_generation, observed_generation
200    );
201
202    // Reconcile namespace-scoped Bind9Cluster resources
203    // The Bind9Cluster reconciler will handle creating Bind9Instance resources
204    // This ensures proper delegation: GlobalCluster → Cluster → Instance
205    reconcile_namespace_clusters(&client, &cluster).await?;
206
207    // Update cluster status based on instances across all namespaces
208    update_cluster_status(&client, &cluster).await?;
209
210    Ok(())
211}
212
213/// Reconciles namespace-scoped `Bind9Cluster` resources for this global cluster.
214///
215/// This function creates or updates a namespace-scoped `Bind9Cluster` resource in each
216/// namespace where this global cluster has instances. The namespace-scoped cluster
217/// will then create the `ConfigMap` that instances need.
218///
219/// This delegation pattern ensures:
220/// - `ConfigMaps` exist before instances try to mount them
221/// - The `Bind9Cluster` reconciler handles `ConfigMap` creation logic
222/// - No duplication of `ConfigMap` creation code
223///
224/// # Errors
225///
226/// Returns an error if listing instances or creating/updating clusters fails.
227#[allow(clippy::too_many_lines)]
228async fn reconcile_namespace_clusters(
229    client: &Client,
230    cluster_provider: &ClusterBind9Provider,
231) -> Result<()> {
232    use crate::crd::{Bind9Cluster, Bind9ClusterSpec};
233    use crate::labels::{
234        BINDY_CLUSTER_LABEL, BINDY_MANAGED_BY_LABEL, MANAGED_BY_CLUSTER_BIND9_PROVIDER,
235    };
236    use kube::api::{ListParams, PostParams};
237    use std::collections::{BTreeMap, HashSet};
238
239    let cluster_provider_name = cluster_provider.name_any();
240
241    // Get target namespace from spec or default to operator's namespace
242    let target_namespace = cluster_provider.spec.namespace.as_ref().map_or_else(
243        || std::env::var("POD_NAMESPACE").unwrap_or_else(|_| "dns-system".to_string()),
244        std::clone::Clone::clone,
245    );
246
247    debug!(
248        "Reconciling namespace-scoped Bind9Cluster for global cluster {} in namespace {}",
249        cluster_provider_name, target_namespace
250    );
251
252    // List all instances across all namespaces that reference this global cluster
253    let instances_api: Api<Bind9Instance> = Api::all(client.clone());
254    let all_instances = instances_api.list(&ListParams::default()).await?;
255
256    // Find unique namespaces that have instances referencing this global cluster
257    let namespaces: HashSet<String> = all_instances
258        .items
259        .iter()
260        .filter(|inst| inst.spec.cluster_ref == cluster_provider_name)
261        .filter_map(kube::ResourceExt::namespace)
262        .collect();
263
264    // If no instances reference this global cluster, still create cluster in target namespace
265    let namespaces_to_reconcile: HashSet<String> = if namespaces.is_empty() {
266        let mut set = HashSet::new();
267        set.insert(target_namespace);
268        set
269    } else {
270        namespaces
271    };
272
273    debug!(
274        "Found {} namespace(s) needing Bind9Cluster for global cluster {}",
275        namespaces_to_reconcile.len(),
276        cluster_provider_name
277    );
278
279    // For each namespace, create or update a namespace-scoped Bind9Cluster
280    for namespace in namespaces_to_reconcile {
281        // Use the global cluster name directly (don't append "-cluster")
282        let cluster_name = cluster_provider_name.clone();
283
284        info!(
285            "Creating/updating Bind9Cluster {}/{} for global cluster {}",
286            namespace, cluster_name, cluster_provider_name
287        );
288
289        // Create labels to mark this as managed by the global cluster
290        let mut labels = BTreeMap::new();
291        labels.insert(
292            BINDY_MANAGED_BY_LABEL.to_string(),
293            MANAGED_BY_CLUSTER_BIND9_PROVIDER.to_string(),
294        );
295        labels.insert(
296            BINDY_CLUSTER_LABEL.to_string(),
297            cluster_provider_name.clone(),
298        );
299
300        // Create ownerReference to global cluster (cluster-scoped can own namespace-scoped)
301        let owner_ref = k8s_openapi::apimachinery::pkg::apis::meta::v1::OwnerReference {
302            api_version: API_GROUP_VERSION.to_string(),
303            kind: KIND_CLUSTER_BIND9_PROVIDER.to_string(),
304            name: cluster_provider_name.clone(),
305            uid: cluster_provider.metadata.uid.clone().unwrap_or_default(),
306            controller: Some(true),
307            block_owner_deletion: Some(true),
308        };
309
310        // Build the Bind9Cluster spec by cloning the global cluster's common spec
311        let cluster_spec = Bind9ClusterSpec {
312            common: cluster_provider.spec.common.clone(),
313        };
314
315        let cluster = Bind9Cluster {
316            metadata: kube::api::ObjectMeta {
317                name: Some(cluster_name.clone()),
318                namespace: Some(namespace.clone()),
319                labels: Some(labels),
320                owner_references: Some(vec![owner_ref]),
321                ..Default::default()
322            },
323            spec: cluster_spec,
324            status: None,
325        };
326
327        let api: Api<Bind9Cluster> = Api::namespaced(client.clone(), &namespace);
328
329        // Try to create the Bind9Cluster
330        match api.create(&PostParams::default(), &cluster).await {
331            Ok(_) => {
332                info!(
333                    "Successfully created Bind9Cluster {}/{}",
334                    namespace, cluster_name
335                );
336            }
337            Err(e) => {
338                // If already exists, PATCH it to ensure spec is up to date
339                if e.to_string().contains("AlreadyExists") {
340                    debug!(
341                        "Bind9Cluster {}/{} already exists, patching with updated spec",
342                        namespace, cluster_name
343                    );
344
345                    // Build a complete patch object for server-side apply
346                    let patch = serde_json::json!({
347                        "apiVersion": API_GROUP_VERSION,
348                        "kind": KIND_BIND9_CLUSTER,
349                        "metadata": {
350                            "name": cluster_name,
351                            "namespace": namespace,
352                            "ownerReferences": cluster.metadata.owner_references,
353                        },
354                        "spec": cluster.spec,
355                    });
356
357                    // Apply the patch to update the spec
358                    match api
359                        .patch(
360                            &cluster_name,
361                            &PatchParams::apply("bindy-controller").force(),
362                            &Patch::Apply(&patch),
363                        )
364                        .await
365                    {
366                        Ok(_) => {
367                            info!(
368                                "Successfully patched Bind9Cluster {}/{} with updated spec",
369                                namespace, cluster_name
370                            );
371                        }
372                        Err(patch_err) => {
373                            warn!(
374                                "Failed to patch Bind9Cluster {}/{}: {}",
375                                namespace, cluster_name, patch_err
376                            );
377                            return Err(patch_err.into());
378                        }
379                    }
380                } else {
381                    warn!(
382                        "Failed to create Bind9Cluster {}/{}: {}",
383                        namespace, cluster_name, e
384                    );
385                    return Err(e.into());
386                }
387            }
388        }
389    }
390
391    Ok(())
392}
393
394/// Updates the global cluster status based on instances across all namespaces.
395///
396/// # Errors
397///
398/// Returns an error if status update fails.
399async fn update_cluster_status(client: &Client, cluster: &ClusterBind9Provider) -> Result<()> {
400    let name = cluster.name_any();
401
402    // List all Bind9Instance resources across all namespaces
403    let instances_api: Api<Bind9Instance> = Api::all(client.clone());
404    let lp = ListParams::default();
405    let all_instances = instances_api.list(&lp).await?;
406
407    // Filter instances that reference this global cluster
408    let instances: Vec<_> = all_instances
409        .items
410        .into_iter()
411        .filter(|inst| inst.spec.cluster_ref == name)
412        .collect();
413
414    debug!(
415        "Found {} instances referencing ClusterBind9Provider {}",
416        instances.len(),
417        name
418    );
419
420    // Calculate cluster status based on instances
421    let new_status = calculate_cluster_status(&instances, cluster.metadata.generation);
422
423    // Check if status has actually changed before patching
424    let status_changed = if let Some(current_status) = &cluster.status {
425        // Check if instance count or ready count changed
426        if current_status.instance_count != new_status.instance_count
427            || current_status.ready_instances != new_status.ready_instances
428        {
429            true
430        } else if let Some(current_condition) = current_status.conditions.first() {
431            // Check if condition changed
432            let new_condition = new_status.conditions.first();
433            match new_condition {
434                Some(new_cond) => {
435                    current_condition.r#type != new_cond.r#type
436                        || current_condition.status != new_cond.status
437                        || current_condition.message != new_cond.message
438                }
439                None => true, // New status has no condition, definitely changed
440            }
441        } else {
442            // Current status has no condition but new status might
443            !new_status.conditions.is_empty()
444        }
445    } else {
446        // No current status, definitely need to update
447        true
448    };
449
450    // Only update if status has changed
451    if !status_changed {
452        debug!(
453            "Status unchanged for ClusterBind9Provider {}, skipping patch",
454            name
455        );
456        return Ok(());
457    }
458
459    // Update status
460    let api: Api<ClusterBind9Provider> = Api::all(client.clone());
461    let status_patch = json!({
462        "status": new_status
463    });
464
465    api.patch_status(&name, &PatchParams::default(), &Patch::Merge(&status_patch))
466        .await?;
467
468    debug!("Updated status for ClusterBind9Provider: {}", name);
469    Ok(())
470}
471
472/// Calculates the cluster status based on instance states.
473///
474/// # Arguments
475///
476/// * `instances` - List of instances belonging to this cluster
477/// * `generation` - Current generation of the cluster resource
478///
479/// # Returns
480///
481/// A `Bind9ClusterStatus` with calculated conditions and instance list
482#[must_use]
483pub fn calculate_cluster_status(
484    instances: &[Bind9Instance],
485    generation: Option<i64>,
486) -> Bind9ClusterStatus {
487    let now = Utc::now();
488
489    // Count ready instances
490    let ready_instances = instances
491        .iter()
492        .filter(|inst| {
493            inst.status
494                .as_ref()
495                .and_then(|s| s.conditions.iter().find(|c| c.r#type == "Ready"))
496                .is_some_and(|c| c.status == "True")
497        })
498        .count();
499
500    let total_instances = instances.len();
501
502    // Determine cluster ready condition using standard reasons
503    let (status, reason, message) = if total_instances == 0 {
504        (
505            "False",
506            REASON_NO_CHILDREN,
507            "No instances found for this cluster".to_string(),
508        )
509    } else if ready_instances == total_instances {
510        (
511            "True",
512            REASON_ALL_READY,
513            format!("All {total_instances} instances are ready"),
514        )
515    } else if ready_instances > 0 {
516        (
517            "False",
518            REASON_PARTIALLY_READY,
519            format!("{ready_instances}/{total_instances} instances are ready"),
520        )
521    } else {
522        (
523            "False",
524            REASON_NOT_READY,
525            "No instances are ready".to_string(),
526        )
527    };
528
529    // Collect instance names (with namespace for global clusters)
530    let instance_names: Vec<String> = instances
531        .iter()
532        .map(|inst| {
533            let name = inst.name_any();
534            let ns = inst.namespace().unwrap_or_default();
535            format!("{ns}/{name}")
536        })
537        .collect();
538
539    Bind9ClusterStatus {
540        conditions: vec![Condition {
541            r#type: CONDITION_TYPE_READY.to_string(),
542            status: status.to_string(),
543            reason: Some(reason.to_string()),
544            message: Some(message.clone()),
545            last_transition_time: Some(now.to_rfc3339()),
546        }],
547        instances: instance_names,
548        observed_generation: generation,
549        #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
550        instance_count: Some(total_instances as i32),
551        #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
552        ready_instances: Some(ready_instances as i32),
553    }
554}
555
556/// Deletes a cluster-scoped `ClusterBind9Provider` resource.
557///
558/// This is called when the cluster resource is explicitly deleted by the user.
559/// It delegates to the reconciler which handles the deletion via finalizers.
560///
561/// # Arguments
562///
563/// * `client` - Kubernetes API client
564/// * `cluster` - The `ClusterBind9Provider` resource being deleted
565///
566/// # Returns
567///
568/// * `Ok(())` - If deletion handling succeeded
569/// * `Err(_)` - If deletion failed
570///
571/// # Errors
572///
573/// Returns an error if finalizer cleanup or API operations fail.
574pub async fn delete_clusterbind9provider(
575    client: Client,
576    cluster: ClusterBind9Provider,
577) -> Result<()> {
578    let name = cluster.name_any();
579    info!("Deleting ClusterBind9Provider: {}", name);
580
581    // Deletion is handled via the reconciler through finalizers
582    reconcile_clusterbind9provider(client, cluster).await
583}