bindy/reconcilers/bind9instance/
mod.rs

1// Copyright (c) 2025 Erick Bourgeois, firestoned
2// SPDX-License-Identifier: MIT
3
4//! BIND9 instance reconciliation logic.
5//!
6//! This module handles the lifecycle of BIND9 DNS server deployments in Kubernetes.
7//! It creates and manages Deployments, `ConfigMaps`, and Services for each `Bind9Instance`.
8//!
9//! ## Module Structure
10//!
11//! - [`cluster_helpers`] - Cluster integration and reference management
12//! - [`config`] - RNDC configuration precedence resolution
13//! - [`resources`] - Resource lifecycle (`ConfigMap`, Deployment, Service)
14//! - [`status_helpers`] - Status calculation and updates
15//! - [`types`] - Shared types and imports
16//! - [`zones`] - Zone reconciliation logic
17
18// Submodules
19pub mod cluster_helpers;
20pub mod config;
21pub mod resources;
22pub mod status_helpers;
23pub mod types;
24pub mod zones;
25
26// Re-export public APIs for external use
27pub use zones::reconcile_instance_zones;
28
29// Internal imports
30use cluster_helpers::{build_cluster_reference, fetch_cluster_info};
31use resources::{create_or_update_resources, delete_resources};
32use status_helpers::{update_status, update_status_from_deployment};
33#[allow(clippy::wildcard_imports)]
34use types::*;
35use zones::reconcile_instance_zones as reconcile_zones_internal;
36
37use crate::reconcilers::finalizers::{ensure_finalizer, handle_deletion, FinalizerCleanup};
38
39/// Calculate the requeue duration for the next reconciliation based on RNDC rotation schedule.
40///
41/// If auto-rotation is enabled and a rotation time is scheduled, this function calculates
42/// the duration until that rotation time. If the rotation is overdue, it returns a minimal
43/// duration to trigger immediate reconciliation.
44///
45/// # Arguments
46///
47/// * `config` - RNDC configuration with rotation settings
48/// * `secret` - The RNDC Secret with rotation annotations
49///
50/// # Returns
51///
52/// Duration until next reconciliation. Returns `None` if rotation is disabled or Secret
53/// has no rotation annotations.
54///
55/// # Examples
56///
57/// ```rust,ignore
58/// use bindy::crd::RndcKeyConfig;
59/// use k8s_openapi::api::core::v1::Secret;
60/// use bindy::reconcilers::bind9instance::calculate_requeue_duration;
61///
62/// let config = RndcKeyConfig {
63///     auto_rotate: true,
64///     rotate_after: "720h".to_string(),
65///     ..Default::default()
66/// };
67///
68/// // Create a secret with rotation annotations
69/// let secret = Secret {
70///     metadata: ObjectMeta {
71///         annotations: Some(BTreeMap::from([
72///             ("bindy.firestoned.io/rotation-created-at".to_string(), "2025-01-01T00:00:00Z".to_string()),
73///             ("bindy.firestoned.io/rotation-rotate-at".to_string(), "2025-02-01T00:00:00Z".to_string()),
74///         ])),
75///         ..Default::default()
76///     },
77///     ..Default::default()
78/// };
79///
80/// // Returns duration until rotate_at timestamp
81/// let duration = calculate_requeue_duration(&config, &secret);
82/// ```
83#[allow(dead_code)] // Will be used when requeue logic is integrated
84fn calculate_requeue_duration(
85    config: &crate::crd::RndcKeyConfig,
86    secret: &Secret,
87) -> Option<std::time::Duration> {
88    use chrono::Utc;
89
90    // Only calculate requeue if auto-rotation is enabled
91    if !config.auto_rotate {
92        return None;
93    }
94
95    // Extract rotation annotations from Secret
96    let annotations = secret.metadata.annotations.as_ref()?;
97    let (_created_at, rotate_at, _rotation_count) =
98        crate::bind9::rndc::parse_rotation_annotations(annotations).ok()?;
99
100    // If no rotation scheduled, no need for specific requeue
101    let rotate_at = rotate_at?;
102
103    let now = Utc::now();
104    let time_until_rotation = rotate_at.signed_duration_since(now);
105
106    // If rotation is overdue or very soon, reconcile quickly (30 seconds)
107    if time_until_rotation.num_seconds() <= 0 {
108        return Some(std::time::Duration::from_secs(30));
109    }
110
111    // Otherwise, schedule reconciliation slightly before rotation time (5 minutes early)
112    let requeue_secs = time_until_rotation
113        .num_seconds()
114        .saturating_sub(300) // 5 minutes early
115        .max(30); // At least 30 seconds
116
117    #[allow(clippy::cast_sign_loss)] // Value is guaranteed non-negative by max(30)
118    Some(std::time::Duration::from_secs(requeue_secs as u64))
119}
120
121/// Update the `Bind9Instance` status with RNDC key rotation information.
122///
123/// Reads rotation metadata from the RNDC Secret annotations and updates the instance
124/// status with current rotation state. This provides visibility into key age and
125/// rotation schedule.
126///
127/// # Arguments
128///
129/// * `client` - Kubernetes API client
130/// * `instance` - The `Bind9Instance` resource to update
131/// * `secret` - The RNDC Secret containing rotation annotations
132/// * `config` - RNDC configuration with rotation settings
133///
134/// # Returns
135///
136/// `Ok(())` on success, error if status update fails.
137///
138/// # Errors
139///
140/// Returns an error if:
141/// - Secret annotations are missing or malformed
142/// - Status patch API call fails
143async fn update_rotation_status(
144    client: &Client,
145    instance: &Bind9Instance,
146    secret: &Secret,
147    config: &crate::crd::RndcKeyConfig,
148) -> Result<()> {
149    use crate::crd::RndcKeyRotationStatus;
150
151    // Only update status if auto-rotation is enabled
152    if !config.auto_rotate {
153        return Ok(());
154    }
155
156    let Some(annotations) = &secret.metadata.annotations else {
157        debug!("Secret has no annotations, skipping rotation status update");
158        return Ok(());
159    };
160
161    let (created_at, rotate_at, rotation_count) =
162        crate::bind9::rndc::parse_rotation_annotations(annotations)?;
163
164    // Determine last_rotated_at: if rotation_count > 0, the current created_at is when it was last rotated
165    let last_rotated_at = if rotation_count > 0 {
166        Some(created_at.to_rfc3339())
167    } else {
168        None
169    };
170
171    let rotation_status = RndcKeyRotationStatus {
172        created_at: created_at.to_rfc3339(),
173        rotate_at: rotate_at.map(|dt| dt.to_rfc3339()),
174        last_rotated_at,
175        rotation_count,
176    };
177
178    // Prepare status update
179    let namespace = instance.namespace().unwrap_or_default();
180    let name = instance.name_any();
181
182    let status = serde_json::json!({
183        "status": {
184            "rndcKeyRotation": rotation_status
185        }
186    });
187
188    let api: Api<Bind9Instance> = Api::namespaced(client.clone(), &namespace);
189    api.patch_status(
190        &name,
191        &PatchParams::default(),
192        &kube::api::Patch::Merge(&status),
193    )
194    .await?;
195
196    debug!(
197        "Updated rotation status for {}/{}: rotation_count={}, rotate_at={:?}",
198        namespace, name, rotation_count, rotate_at
199    );
200
201    Ok(())
202}
203
204/// Implement cleanup trait for `Bind9Instance` finalizer management.
205#[async_trait::async_trait]
206impl FinalizerCleanup for Bind9Instance {
207    async fn cleanup(&self, client: &Client) -> Result<()> {
208        let namespace = self.namespace().unwrap_or_default();
209        let name = self.name_any();
210
211        // Check if this instance is managed by a Bind9Cluster
212        let is_managed: bool = self
213            .metadata
214            .labels
215            .as_ref()
216            .and_then(|labels| labels.get(BINDY_MANAGED_BY_LABEL))
217            .is_some();
218
219        if is_managed {
220            info!(
221                "Bind9Instance {}/{} is managed by a Bind9Cluster, skipping resource cleanup (cluster will handle it)",
222                namespace, name
223            );
224            Ok(())
225        } else {
226            info!(
227                "Running cleanup for standalone Bind9Instance {}/{}",
228                namespace, name
229            );
230            delete_resources(client, &namespace, &name).await
231        }
232    }
233}
234
235/// Reconciles a `Bind9Instance` resource.
236///
237/// Creates or updates all Kubernetes resources needed to run a BIND9 DNS server:
238/// - `ConfigMap` with BIND9 configuration files
239/// - Deployment with BIND9 container pods
240/// - Service for DNS traffic (TCP/UDP port 53)
241///
242/// # Arguments
243///
244/// * `ctx` - Operator context with Kubernetes client and reflector stores
245/// * `instance` - The `Bind9Instance` resource to reconcile
246///
247/// # Returns
248///
249/// * `Ok(())` - If reconciliation succeeded
250/// * `Err(_)` - If resource creation/update failed
251///
252/// # Example
253///
254/// ```rust,no_run
255/// use bindy::reconcilers::reconcile_bind9instance;
256/// use bindy::crd::Bind9Instance;
257/// use bindy::context::Context;
258/// use std::sync::Arc;
259///
260/// async fn handle_instance(ctx: Arc<Context>, instance: Bind9Instance) -> anyhow::Result<()> {
261///     reconcile_bind9instance(ctx, instance).await?;
262///     Ok(())
263/// }
264/// ```
265///
266/// # Errors
267///
268/// Returns an error if Kubernetes API operations fail or resource creation/update fails.
269#[allow(clippy::too_many_lines)]
270pub async fn reconcile_bind9instance(ctx: Arc<Context>, instance: Bind9Instance) -> Result<()> {
271    let client = ctx.client.clone();
272    let namespace = instance.namespace().unwrap_or_default();
273    let name = instance.name_any();
274
275    info!("Reconciling Bind9Instance: {}/{}", namespace, name);
276    debug!(
277        namespace = %namespace,
278        name = %name,
279        generation = ?instance.metadata.generation,
280        "Starting Bind9Instance reconciliation"
281    );
282
283    // Check if the instance is being deleted
284    if instance.metadata.deletion_timestamp.is_some() {
285        return handle_deletion(&client, &instance, FINALIZER_BIND9_INSTANCE).await;
286    }
287
288    // Add finalizer if not present
289    ensure_finalizer(&client, &instance, FINALIZER_BIND9_INSTANCE).await?;
290
291    let spec = &instance.spec;
292    let replicas = spec.replicas.unwrap_or(1);
293    let version = spec
294        .version
295        .as_deref()
296        .unwrap_or(crate::constants::DEFAULT_BIND9_VERSION);
297
298    debug!(
299        cluster_ref = %spec.cluster_ref,
300        replicas,
301        version = %version,
302        role = ?spec.role,
303        "Instance configuration"
304    );
305
306    info!(
307        "Bind9Instance {} configured with {} replicas, version {}",
308        name, replicas, version
309    );
310
311    // Check if spec has changed using the standard generation check
312    let current_generation = instance.metadata.generation;
313    let observed_generation = instance.status.as_ref().and_then(|s| s.observed_generation);
314
315    // Check if this instance is managed by a Bind9Cluster
316    let is_managed: bool = instance
317        .metadata
318        .labels
319        .as_ref()
320        .and_then(|labels| labels.get(BINDY_MANAGED_BY_LABEL))
321        .is_some();
322
323    // Fetch cluster information early for rotation checking and zone reconciliation
324    // We need this to set the cluster reference in DNSZone status
325    let (cluster, cluster_provider) = fetch_cluster_info(&client, &namespace, &instance).await;
326
327    // Check if parent cluster configuration has changed since last reconciliation
328    // This is critical for detecting when RNDC config is added/changed at the cluster level
329    let parent_config_changed = {
330        // Check Bind9Cluster generation
331        let cluster_changed = if let Some(ref c) = cluster {
332            let cluster_generation = c.metadata.generation.unwrap_or(0);
333            let instance_observed_gen = observed_generation.unwrap_or(0);
334
335            // If cluster generation is newer than when we last reconciled, parent config may have changed
336            // Note: This is a heuristic since we don't track parent's observed generation separately
337            // We compare against instance's observed_generation as a proxy for "last reconciliation time"
338            if cluster_generation > instance_observed_gen {
339                debug!(
340                    "Parent Bind9Cluster generation ({}) is newer than instance observed generation ({}), checking for config changes",
341                    cluster_generation, instance_observed_gen
342                );
343                true
344            } else {
345                false
346            }
347        } else {
348            false
349        };
350
351        // Check ClusterBind9Provider generation
352        let provider_changed = if let Some(ref cp) = cluster_provider {
353            let provider_generation = cp.metadata.generation.unwrap_or(0);
354            let instance_observed_gen = observed_generation.unwrap_or(0);
355
356            // Same heuristic: if provider generation is newer, config may have changed
357            if provider_generation > instance_observed_gen {
358                debug!(
359                    "Parent ClusterBind9Provider generation ({}) is newer than instance observed generation ({}), checking for config changes",
360                    provider_generation, instance_observed_gen
361                );
362                true
363            } else {
364                false
365            }
366        } else {
367            false
368        };
369
370        cluster_changed || provider_changed
371    };
372
373    if parent_config_changed {
374        info!(
375            "Parent cluster configuration may have changed for Bind9Instance {}/{}, will check for drift",
376            namespace, name
377        );
378    }
379
380    // Check if ALL required resources actually exist AND match desired state (drift detection)
381    let (all_resources_exist, deployment_labels_match, rotation_needed) = {
382        let deployment_api: Api<Deployment> = Api::namespaced(client.clone(), &namespace);
383        let service_api: Api<Service> = Api::namespaced(client.clone(), &namespace);
384        let configmap_api: Api<ConfigMap> = Api::namespaced(client.clone(), &namespace);
385        let secret_api: Api<Secret> = Api::namespaced(client.clone(), &namespace);
386
387        // Fetch deployment to check if it exists AND if OUR labels match
388        let (deployment_exists, labels_match) = match deployment_api.get(&name).await {
389            Ok(deployment) => {
390                // Build desired labels from instance - these are the labels WE manage
391                let desired_labels =
392                    crate::bind9_resources::build_labels_from_instance(&name, &instance);
393
394                // Check if deployment has all OUR labels with correct values
395                // IMPORTANT: Only check labels we explicitly set via build_labels_from_instance()
396                // Other controllers or users may add additional labels - we don't care about those
397                let labels_match = if let Some(actual_labels) = &deployment.metadata.labels {
398                    desired_labels
399                        .iter()
400                        .all(|(key, value)| actual_labels.get(key) == Some(value))
401                } else {
402                    false // No labels at all = no match
403                };
404
405                (true, labels_match)
406            }
407            Err(_) => (false, false),
408        };
409
410        let service_exists = service_api.get(&name).await.is_ok();
411
412        // Check ConfigMap - managed instances use cluster ConfigMap, standalone use instance ConfigMap
413        let configmap_name = if is_managed {
414            format!("{}-config", spec.cluster_ref)
415        } else {
416            format!("{name}-config")
417        };
418        let configmap_exists = configmap_api.get(&configmap_name).await.is_ok();
419
420        // Check Secret existence AND rotation status
421        let secret_name = format!("{name}-rndc-key");
422        let (secret_exists, needs_rotation) = match secret_api.get(&secret_name).await {
423            Ok(secret) => {
424                // Resolve RNDC config to check if rotation is due
425                let rndc_config = resources::resolve_full_rndc_config(
426                    &instance,
427                    cluster.as_ref(),
428                    cluster_provider.as_ref(),
429                );
430
431                // Check if rotation is needed using the existing function
432                let needs_rotation =
433                    resources::should_rotate_secret(&secret, &rndc_config).unwrap_or(false);
434
435                if needs_rotation {
436                    debug!(
437                        "RNDC Secret {}/{} rotation is due, will trigger reconciliation",
438                        namespace, secret_name
439                    );
440                }
441
442                (true, needs_rotation)
443            }
444            Err(_) => (false, false),
445        };
446
447        let all_exist = deployment_exists && service_exists && configmap_exists && secret_exists;
448        (all_exist, labels_match, needs_rotation)
449    };
450    let cluster_ref = build_cluster_reference(cluster.as_ref(), cluster_provider.as_ref());
451
452    if let Some(ref cr) = cluster_ref {
453        debug!(
454            "Built cluster reference for instance {}/{}: {}/{} in namespace {:?}",
455            namespace, name, cr.kind, cr.name, cr.namespace
456        );
457    } else {
458        debug!(
459            "No cluster reference built for instance {}/{} - spec.clusterRef may be empty or cluster not found",
460            namespace, name
461        );
462    }
463
464    // Only reconcile resources if:
465    // 1. Spec changed (generation mismatch), OR
466    // 2. We haven't processed this resource yet (no observed_generation), OR
467    // 3. Resources are missing (drift detected), OR
468    // 4. RNDC Secret rotation is due, OR
469    // 5. Parent cluster configuration has changed
470    let should_reconcile =
471        crate::reconcilers::should_reconcile(current_generation, observed_generation);
472
473    // REMOVED: Zone discovery logic - instances no longer select zones
474    // Zone selection is now reversed: DNSZone.spec.bind9_instances_from selects instances
475    // This logic was removed as part of the architectural change to reverse selector direction
476
477    if !should_reconcile
478        && all_resources_exist
479        && deployment_labels_match
480        && !rotation_needed
481        && !parent_config_changed
482    {
483        debug!(
484            "Spec unchanged (generation={:?}), all resources exist, deployment labels match, no rotation needed, and parent config unchanged - skipping resource reconciliation",
485            current_generation
486        );
487        // Update status from current deployment state (only patches if status changed)
488        // Preserve existing cluster_ref from instance status if available
489        let cluster_ref = instance.status.as_ref().and_then(|s| s.cluster_ref.clone());
490        update_status_from_deployment(&client, &namespace, &name, &instance, cluster_ref).await?;
491
492        // Reconcile zones after status update
493        reconcile_zones_internal(&client, &ctx.stores, &instance).await?;
494
495        return Ok(());
496    }
497
498    // If we reach here, reconciliation is needed because:
499    // - Spec changed (generation mismatch), OR
500    // - Resources don't exist (drift), OR
501    // - Deployment labels don't match desired state (drift), OR
502    // - RNDC Secret rotation is due, OR
503    // - Parent cluster configuration has changed
504    if !deployment_labels_match && all_resources_exist {
505        info!(
506            "Deployment labels don't match desired state for {}/{}, triggering reconciliation to update labels",
507            namespace, name
508        );
509    }
510
511    if !should_reconcile && !all_resources_exist {
512        info!(
513            "Drift detected for Bind9Instance {}/{}: One or more resources missing, will recreate",
514            namespace, name
515        );
516    }
517
518    if rotation_needed {
519        info!(
520            "RNDC Secret rotation is due for Bind9Instance {}/{}, triggering reconciliation",
521            namespace, name
522        );
523    }
524
525    if parent_config_changed {
526        info!(
527            "Parent cluster configuration changed for Bind9Instance {}/{}, triggering reconciliation to apply new config",
528            namespace, name
529        );
530    }
531
532    debug!(
533        "Reconciliation needed: current_generation={:?}, observed_generation={:?}",
534        current_generation, observed_generation
535    );
536
537    // Create or update resources
538    match create_or_update_resources(&client, &namespace, &name, &instance).await {
539        Ok((cluster, cluster_provider, secret)) => {
540            info!(
541                "Successfully created/updated resources for {}/{}",
542                namespace, name
543            );
544
545            // Build cluster reference for status
546            let cluster_ref = build_cluster_reference(cluster.as_ref(), cluster_provider.as_ref());
547
548            // Update status based on actual deployment state
549            update_status_from_deployment(&client, &namespace, &name, &instance, cluster_ref)
550                .await?;
551
552            // Update rotation status if Secret is available
553            if let Some(ref secret) = secret {
554                // Resolve RNDC config for rotation status update
555                let rndc_config = resources::resolve_full_rndc_config(
556                    &instance,
557                    cluster.as_ref(),
558                    cluster_provider.as_ref(),
559                );
560
561                if let Err(e) =
562                    update_rotation_status(&client, &instance, secret, &rndc_config).await
563                {
564                    warn!(
565                        "Failed to update rotation status for {}/{}: {}",
566                        namespace, name, e
567                    );
568                    // Non-fatal error, continue reconciliation
569                }
570            }
571
572            // Reconcile zones after deployment creation/update
573            reconcile_zones_internal(&client, &ctx.stores, &instance).await?;
574        }
575        Err(e) => {
576            error!(
577                "Failed to create/update resources for {}/{}: {}",
578                namespace, name, e
579            );
580
581            // Update status to show error
582            let error_condition = Condition {
583                r#type: CONDITION_TYPE_READY.to_string(),
584                status: "False".to_string(),
585                reason: Some(REASON_NOT_READY.to_string()),
586                message: Some(format!("Failed to create resources: {e}")),
587                last_transition_time: Some(Utc::now().to_rfc3339()),
588            };
589            // No cluster info available on error, pass None
590            update_status(&client, &instance, vec![error_condition], None).await?;
591
592            return Err(e);
593        }
594    }
595
596    Ok(())
597}
598
599/// Delete handler for `Bind9Instance` resources (cleanup logic).
600///
601/// This function is kept for backward compatibility but deletion is now handled
602/// by the finalizer in `reconcile_bind9instance`.
603///
604/// # Errors
605///
606/// This function currently never returns an error, but returns `Result` for API consistency.
607pub async fn delete_bind9instance(ctx: Arc<Context>, instance: Bind9Instance) -> Result<()> {
608    let _client = ctx.client.clone();
609    let namespace = instance.namespace().unwrap_or_default();
610    let name = instance.name_any();
611
612    info!(
613        "Delete called for Bind9Instance {}/{} (handled by finalizer)",
614        namespace, name
615    );
616
617    // Deletion is now handled by the finalizer in reconcile_bind9instance
618    Ok(())
619}
620
621#[cfg(test)]
622#[path = "mod_tests.rs"]
623mod mod_tests;