bindy/reconcilers/dnszone/
cleanup.rs

1// Copyright (c) 2025 Erick Bourgeois, firestoned
2// SPDX-License-Identifier: MIT
3
4//! Cleanup operations for DNS zones.
5//!
6//! This module handles cleanup of deleted instances and stale records from zone status.
7
8use anyhow::Result;
9use kube::Client;
10use tracing::{debug, info, warn};
11
12use crate::crd::DNSZone;
13
14/// Clean up deleted instances from zone status.
15///
16/// Iterates through instances in zone status and removes any that no longer exist
17/// in the Kubernetes API.
18///
19/// # Arguments
20///
21/// * `client` - Kubernetes client
22/// * `dnszone` - The DNSZone resource being reconciled
23/// * `status_updater` - Status updater for modifying zone status
24///
25/// # Returns
26///
27/// Number of instances removed from status
28///
29/// # Errors
30///
31/// Returns an error if Kubernetes API calls fail critically.
32pub async fn cleanup_deleted_instances(
33    client: &Client,
34    dnszone: &DNSZone,
35    status_updater: &mut crate::reconcilers::status::DNSZoneStatusUpdater,
36) -> Result<usize> {
37    use crate::crd::Bind9Instance;
38    use kube::{Api, ResourceExt};
39
40    let namespace = dnszone.namespace().unwrap_or_default();
41    let zone_name = &dnszone.spec.zone_name;
42
43    // Get current instances from status
44    let current_instances = dnszone
45        .status
46        .as_ref()
47        .map(|s| s.bind9_instances.clone())
48        .unwrap_or_default();
49
50    if current_instances.is_empty() {
51        debug!(
52            "No instances in status for zone {}/{} - skipping cleanup",
53            namespace, zone_name
54        );
55        return Ok(0);
56    }
57
58    info!(
59        "Cleaning up deleted instances for zone {}/{}: checking {} instance(s)",
60        namespace,
61        zone_name,
62        current_instances.len()
63    );
64
65    let mut deleted_count = 0;
66
67    // Check each instance to see if it still exists
68    for instance_ref in current_instances {
69        let instance_api: Api<Bind9Instance> =
70            Api::namespaced(client.clone(), &instance_ref.namespace);
71
72        let instance_exists = instance_api.get(&instance_ref.name).await.is_ok();
73
74        if !instance_exists {
75            info!(
76                "Instance {}/{} no longer exists - removing from zone {}/{}",
77                instance_ref.namespace, instance_ref.name, namespace, zone_name
78            );
79            status_updater.remove_instance(&instance_ref.name, &instance_ref.namespace);
80            deleted_count += 1;
81        }
82    }
83
84    Ok(deleted_count)
85}
86
87/// Clean up stale records from zone status.
88///
89/// Iterates through records in zone status and removes any that no longer exist
90/// in the Kubernetes API. Also performs self-healing by deleting orphaned records
91/// from BIND9 if they were missed by finalizers.
92///
93/// # Arguments
94///
95/// * `client` - Kubernetes client
96/// * `dnszone` - The DNSZone resource being reconciled
97/// * `status_updater` - Status updater for modifying zone status
98/// * `bind9_instances_store` - Reflector store for querying Bind9Instance resources
99///
100/// # Returns
101///
102/// Number of records removed from status
103///
104/// # Errors
105///
106/// Returns an error if API calls fail critically (non-NotFound errors).
107#[allow(clippy::too_many_lines)]
108pub async fn cleanup_stale_records(
109    client: &Client,
110    dnszone: &DNSZone,
111    status_updater: &mut crate::reconcilers::status::DNSZoneStatusUpdater,
112    bind9_instances_store: &kube::runtime::reflector::Store<crate::crd::Bind9Instance>,
113) -> Result<usize> {
114    use crate::bind9::records::query_dns_record;
115    use crate::crd::{
116        AAAARecord, ARecord, CAARecord, CNAMERecord, DNSRecordKind, MXRecord, NSRecord,
117        RecordReferenceWithTimestamp, SRVRecord, TXTRecord,
118    };
119    use kube::{Api, ResourceExt};
120
121    let namespace = dnszone.namespace().unwrap_or_default();
122    let zone_name = &dnszone.spec.zone_name;
123
124    // Get current records from status
125    let current_records = dnszone
126        .status
127        .as_ref()
128        .map(|s| s.records.clone())
129        .unwrap_or_default();
130
131    if current_records.is_empty() {
132        debug!(
133            "No records in status for zone {}/{} - skipping cleanup",
134            namespace, zone_name
135        );
136        return Ok(0);
137    }
138
139    info!(
140        "Cleaning up stale records for zone {}/{}: checking {} record(s)",
141        namespace,
142        zone_name,
143        current_records.len()
144    );
145
146    // Get instances to query DNS and delete if needed
147    let instance_refs = super::validation::get_instances_from_zone(dnszone, bind9_instances_store)?;
148    let primary_refs = super::primary::filter_primary_instances(client, &instance_refs).await?;
149
150    let mut records_to_keep: Vec<RecordReferenceWithTimestamp> = Vec::new();
151    let mut stale_count = 0;
152
153    // Check each record to see if it still exists
154    for record_ref in current_records {
155        let kind = DNSRecordKind::from(record_ref.kind.as_str());
156        let record_exists = match kind {
157            DNSRecordKind::A => {
158                let api: Api<ARecord> = Api::namespaced(client.clone(), &record_ref.namespace);
159                api.get(&record_ref.name).await.is_ok()
160            }
161            DNSRecordKind::AAAA => {
162                let api: Api<AAAARecord> = Api::namespaced(client.clone(), &record_ref.namespace);
163                api.get(&record_ref.name).await.is_ok()
164            }
165            DNSRecordKind::TXT => {
166                let api: Api<TXTRecord> = Api::namespaced(client.clone(), &record_ref.namespace);
167                api.get(&record_ref.name).await.is_ok()
168            }
169            DNSRecordKind::CNAME => {
170                let api: Api<CNAMERecord> = Api::namespaced(client.clone(), &record_ref.namespace);
171                api.get(&record_ref.name).await.is_ok()
172            }
173            DNSRecordKind::MX => {
174                let api: Api<MXRecord> = Api::namespaced(client.clone(), &record_ref.namespace);
175                api.get(&record_ref.name).await.is_ok()
176            }
177            DNSRecordKind::NS => {
178                let api: Api<NSRecord> = Api::namespaced(client.clone(), &record_ref.namespace);
179                api.get(&record_ref.name).await.is_ok()
180            }
181            DNSRecordKind::SRV => {
182                let api: Api<SRVRecord> = Api::namespaced(client.clone(), &record_ref.namespace);
183                api.get(&record_ref.name).await.is_ok()
184            }
185            DNSRecordKind::CAA => {
186                let api: Api<CAARecord> = Api::namespaced(client.clone(), &record_ref.namespace);
187                api.get(&record_ref.name).await.is_ok()
188            }
189        };
190
191        if record_exists {
192            // Record still exists in Kubernetes - keep it in status
193            // The record reconciler will handle updating BIND9
194            debug!(
195                "Record {} {}/{} still exists - keeping in status",
196                record_ref.kind, record_ref.namespace, record_ref.name
197            );
198            records_to_keep.push(record_ref);
199        } else {
200            // Record doesn't exist in Kubernetes - need to clean up
201            info!(
202                "Record {} {}/{} no longer exists in Kubernetes",
203                record_ref.kind, record_ref.namespace, record_ref.name
204            );
205
206            // Self-healing: Check if record still exists in BIND9 and delete if found
207            // This catches cases where the finalizer failed to delete
208            let kind = DNSRecordKind::from(record_ref.kind.as_str());
209            let record_type = kind.to_hickory_record_type();
210
211            // Extract DNS record name and zone from RecordReference
212            // These fields are populated from spec.name when the record is discovered
213            let dns_record_name = if let Some(name) = &record_ref.record_name {
214                name.as_str()
215            } else {
216                warn!(
217                    "Record {} {}/{} has no recordName in status - skipping BIND9 cleanup",
218                    record_ref.kind, record_ref.namespace, record_ref.name
219                );
220                stale_count += 1;
221                continue;
222            };
223
224            // Check BIND9 on all primary instances and delete if found
225            // Use for_each_instance_endpoint to iterate over all primary endpoints
226            let dns_record_name_clone = dns_record_name.to_string();
227            let dns_zone_name_clone = zone_name.clone();
228            let record_kind = record_ref.kind.clone();
229            let record_namespace = record_ref.namespace.clone();
230            let record_name = record_ref.name.clone();
231
232            // Query and potentially delete from each primary instance
233            let _ = super::helpers::for_each_instance_endpoint(
234                client,
235                &primary_refs,
236                true,      // with_rndc_key (needed for deletion)
237                "dns-tcp", // Use DNS TCP port for queries and updates
238                |pod_endpoint, _instance_name, rndc_key| {
239                    let server = pod_endpoint.clone();
240                    let zone = dns_zone_name_clone.clone();
241                    let dns_name = dns_record_name_clone.clone();
242                    let r_type = record_type;
243                    let r_kind = record_kind.clone();
244                    let r_namespace = record_namespace.clone();
245                    let r_name = record_name.clone();
246
247                    async move {
248                        // Query DNS to check if record exists
249                        match query_dns_record(&zone, &dns_name, r_type, &server).await {
250                            Ok(records) if !records.is_empty() => {
251                                warn!(
252                                    "SELF-HEALING: Record {} {}/{} deleted from K8s but still exists in BIND9 on {}",
253                                    r_kind, r_namespace, r_name, server
254                                );
255
256                                // Delete from BIND9 using the RNDC key
257                                if let Some(key_data) = rndc_key {
258                                    match crate::bind9::records::delete_dns_record(
259                                        &zone,
260                                        &dns_name,
261                                        r_type,
262                                        &server,
263                                        &key_data,
264                                    )
265                                    .await
266                                    {
267                                        Ok(()) => {
268                                            info!(
269                                                "SELF-HEALING: Successfully deleted orphaned {} record {} from BIND9 on {}",
270                                                r_kind, dns_name, server
271                                            );
272                                        }
273                                        Err(e) => {
274                                            warn!(
275                                                "SELF-HEALING: Failed to delete orphaned record from BIND9 on {}: {}",
276                                                server, e
277                                            );
278                                        }
279                                    }
280                                } else {
281                                    warn!(
282                                        "No RNDC key available for {} - cannot delete orphaned record",
283                                        server
284                                    );
285                                }
286                            }
287                            Ok(_) => {
288                                // Record doesn't exist in BIND9 - good, finalizer worked
289                                debug!(
290                                    "Record {} not found in BIND9 on {} - already cleaned up",
291                                    dns_name, server
292                                );
293                            }
294                            Err(e) => {
295                                debug!(
296                                    "Failed to query DNS on {} for {} (may not exist): {}",
297                                    server, dns_name, e
298                                );
299                            }
300                        }
301
302                        Ok(())
303                    }
304                },
305            )
306            .await;
307
308            // Remove from status regardless of whether we found it in BIND9
309            stale_count += 1;
310        }
311    }
312
313    // Update status with cleaned records list
314    if stale_count > 0 {
315        status_updater.set_records(&records_to_keep);
316        info!(
317            "Removed {} stale record(s) from zone {}/{} status",
318            stale_count, namespace, zone_name
319        );
320    }
321
322    Ok(stale_count)
323}
324
325#[cfg(test)]
326#[path = "cleanup_tests.rs"]
327mod cleanup_tests;