Skip to content
1 change: 1 addition & 0 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ binggan = { version = "0.14" }
bytes = { version = "1", features = ["serde"] }
bytesize = { version = "1.3.0", features = ["serde"] }
bytestring = "1.3.0"
census = "0.4.2"
chitchat = { git = "https://github.com/quickwit-oss/chitchat.git", rev = "54cbc70" }
chrono = { version = "0.4", default-features = false, features = [
"clock",
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ anyhow = { workspace = true }
async-speed-limit = { workspace = true }
async-trait = { workspace = true }
bytesize = { workspace = true }
census = { workspace = true }
coarsetime = { workspace = true }
dyn-clone = { workspace = true }
env_logger = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub mod temp_dir;
pub mod test_utils;
pub mod thread_pool;
pub mod tower;
pub mod tracker;
pub mod type_map;
pub mod uri;

Expand Down
208 changes: 208 additions & 0 deletions quickwit/quickwit-common/src/tracker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Copyright 2021-Present Datadog, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Deref;
use std::sync::mpsc::{channel, Receiver, Sender};
use std::sync::{Arc, Mutex};

use census::{Inventory, TrackedObject as InventoredObject};

/// A ressource tracker
///
/// This is used to track whether an object is alive (still in use), or if it's dead (no longer
/// used, but not acknowledged). It does not keep any traces of object that were alive, but were
/// since acknowledged.
#[derive(Clone)]
pub struct Tracker<T: Clone> {
inner_inventory: Inventory<T>,
unacknowledged_drop_receiver: Arc<Mutex<Receiver<T>>>,
return_channel: Sender<T>,
}

/// A single tracked object
#[derive(Debug)]
pub struct TrackedObject<T: Clone> {
inner: Option<InventoredObject<T>>,
return_channel: Sender<T>,
}

impl<T: Clone> TrackedObject<T> {
/// acknoledge an object
pub fn acknowledge(mut self) {
self.inner.take();
}

/// Create an untracked object mostly for tests
pub fn untracked(value: T) -> Self {
Tracker::new().track(value)
}

/// Create an object which is tracked only as long as it's alive,
/// but not once it's dead.
/// The object is tracked through the provided census inventory
pub fn track_alive_in(value: T, inventory: &Inventory<T>) -> Self {
TrackedObject {
inner: Some(inventory.track(value)),
return_channel: channel().0,
}
}
}

impl<T: Clone> AsRef<T> for TrackedObject<T> {
fn as_ref(&self) -> &T {
self
}
}

impl<T: Clone> Deref for TrackedObject<T> {
type Target = T;
fn deref(&self) -> &T {
self.inner
.as_ref()
.expect("inner should only be None during drop")
}
}

impl<T: Clone> Drop for TrackedObject<T> {
fn drop(&mut self) {
if let Some(item) = self.inner.take() {
// if send fails, no one cared about getting that notification, it's fine to
// drop item
let _ = self.return_channel.send(item.as_ref().clone());
}
}
}

impl<T: Clone> Default for Tracker<T> {
fn default() -> Self {
Self::new()
}
}

impl<T: Clone> Tracker<T> {
/// Create a new tracker
pub fn new() -> Self {
let (sender, receiver) = channel();
Tracker {
inner_inventory: Inventory::new(),
unacknowledged_drop_receiver: Arc::new(Mutex::new(receiver)),
return_channel: sender,
}
}

/// Return whether it is safe to recreate this tracker.
///
/// A tracker is considered safe to recreate if this is the only instance left,
/// and it contains no alive object (it may contain dead objects though).
///
/// Once this return true, it will stay that way until [Tracker::track] or [Tracker::clone] are
/// called.
pub fn safe_to_recreate(&self) -> bool {
Arc::strong_count(&self.unacknowledged_drop_receiver) == 1
&& self.inner_inventory.len() == 0
}

/// List object which are considered alive
pub fn list_ongoing(&self) -> Vec<InventoredObject<T>> {
self.inner_inventory.list()
}

/// Take away the list of object considered dead
pub fn take_dead(&self) -> Vec<T> {
let mut res = Vec::new();
let receiver = self.unacknowledged_drop_receiver.lock().unwrap();
while let Ok(dead_entry) = receiver.try_recv() {
res.push(dead_entry);
}
res
}

/// Track a new object.
pub fn track(&self, value: T) -> TrackedObject<T> {
TrackedObject {
inner: Some(self.inner_inventory.track(value)),
return_channel: self.return_channel.clone(),
}
}
}

#[cfg(test)]
mod tests {
use super::{InventoredObject, Tracker};

#[track_caller]
fn assert_tracked_eq<T: PartialEq + std::fmt::Debug>(
got: Vec<InventoredObject<T>>,
expected: Vec<T>,
) {
assert_eq!(
got.len(),
expected.len(),
"expected vec of same lenght, {} != {}",
got.len(),
expected.len()
);
for (got_item, expected_item) in got.into_iter().zip(expected) {
assert_eq!(*got_item, expected_item);
}
}

#[test]
fn test_single_tracker() {
let tracker = Tracker::<u32>::new();

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.take_dead().is_empty());
assert!(tracker.safe_to_recreate());

{
let tracked_1 = tracker.track(1);
assert_tracked_eq(tracker.list_ongoing(), vec![1]);
assert!(tracker.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());
std::mem::drop(tracked_1); // done for clarity and silence unused var warn
}

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.safe_to_recreate());
assert_eq!(tracker.take_dead(), vec![1]);
assert!(tracker.safe_to_recreate());
}

#[test]
fn test_two_tracker() {
let tracker = Tracker::<u32>::new();
let tracker2 = tracker.clone();

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());

{
let tracked_1 = tracker.track(1);
assert_tracked_eq(tracker.list_ongoing(), vec![1]);
assert_tracked_eq(tracker2.list_ongoing(), vec![1]);
assert!(tracker.take_dead().is_empty());
assert!(tracker2.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());
std::mem::drop(tracked_1); // done for clarity and silence unused var warn
}

assert!(tracker.list_ongoing().is_empty());
assert!(tracker2.list_ongoing().is_empty());
assert_eq!(tracker2.take_dead(), vec![1]);
// we took awai the dead from tracker2, so they don't show up in tracker
assert!(tracker.take_dead().is_empty());
}
}
126 changes: 124 additions & 2 deletions quickwit/quickwit-indexing/failpoints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
//! Below we test panics at different steps in the indexing pipeline.

use std::path::Path;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Barrier, Mutex};
use std::time::Duration;

Expand All @@ -42,15 +43,17 @@ use quickwit_common::split_file;
use quickwit_common::temp_dir::TempDirectory;
use quickwit_indexing::actors::MergeExecutor;
use quickwit_indexing::merge_policy::{MergeOperation, MergeTask};
use quickwit_indexing::models::MergeScratch;
use quickwit_indexing::models::{
DetachIndexingPipeline, DetachMergePipeline, MergeScratch, SpawnPipeline,
};
use quickwit_indexing::{get_tantivy_directory_from_split_bundle, TestSandbox};
use quickwit_metastore::{
ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata,
SplitState,
};
use quickwit_proto::indexing::MergePipelineId;
use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService};
use quickwit_proto::types::{IndexUid, NodeId};
use quickwit_proto::types::{IndexUid, NodeId, PipelineUid};
use serde_json::Value as JsonValue;
use tantivy::Directory;

Expand Down Expand Up @@ -346,3 +349,122 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul

Ok(())
}

#[tokio::test]
async fn test_no_duplicate_merge_on_pipeline_restart() -> anyhow::Result<()> {
quickwit_common::setup_logging_for_tests();
let doc_mapper_yaml = r#"
field_mappings:
- name: body
type: text
- name: ts
type: datetime
fast: true
timestamp_field: ts
"#;
let indexing_setting_yaml = r#"
split_num_docs_target: 2500
merge_policy:
type: "limit_merge"
max_merge_ops: 1
merge_factor: 4
max_merge_factor: 4
max_finalize_merge_operations: 1
"#;
let search_fields = ["body"];
let index_id = "test-index-merge-duplication";
let mut test_index_builder = TestSandbox::create(
index_id,
doc_mapper_yaml,
indexing_setting_yaml,
&search_fields,
)
.await?;

// 0: start
// 1: 1st merge reached the failpoint
// 11: 1st merge failed
// 12: 2nd merge reached the failpoint
// 22: 2nd merge failed (we don't care about this state)
let state = Arc::new(AtomicU32::new(0));
let state_clone = state.clone();

fail::cfg_callback("before-merge-split", move || {
use std::sync::atomic::Ordering;
state_clone.fetch_add(1, Ordering::Relaxed);
std::thread::sleep(std::time::Duration::from_millis(300));
state_clone.fetch_add(10, Ordering::Relaxed);
panic!("kill merge pipeline");
})
.unwrap();

let batch: Vec<JsonValue> =
std::iter::repeat_with(|| serde_json::json!({"body ": TEST_TEXT, "ts": 1631072713 }))
.take(500)
.collect();
// this sometime fails because the ingest api isn't aware of the index yet?!
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
for _ in 0..4 {
test_index_builder
.add_documents_through_api(batch.clone())
.await?;
}

let (indexing_pipeline, merge_pipeline) = test_index_builder
.take_indexing_and_merge_pipeline()
.await?;

// stop the pipeline
indexing_pipeline.kill().await;
merge_pipeline
.mailbox()
.ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline)
.await?;

tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let pipeline_id = test_index_builder
.indexing_service()
.ask_for_res(SpawnPipeline {
index_id: index_id.to_string(),
source_config: quickwit_config::SourceConfig::ingest_api_default(),
pipeline_uid: PipelineUid::for_test(1u128),
})
.await?;

tokio::time::sleep(std::time::Duration::from_millis(200)).await;
// we shouldn't have had a 2nd split run yet (the 1st one hasn't panicked just yet)
assert_eq!(state.load(Ordering::Relaxed), 1);
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
assert_eq!(state.load(Ordering::Relaxed), 11);

let merge_pipeline_id = pipeline_id.merge_pipeline_id();
let indexing_pipeline = test_index_builder
.indexing_service()
.ask_for_res(DetachIndexingPipeline { pipeline_id })
.await?;
let merge_pipeline = test_index_builder
.indexing_service()
.ask_for_res(DetachMergePipeline {
pipeline_id: merge_pipeline_id,
})
.await?;

indexing_pipeline.kill().await;
merge_pipeline
.mailbox()
.ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline)
.await?;

// stoping the merge pipeline makes it recheck for possible dead merge
// (alternatively, it does that sooner when rebuilding the known split list)
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
// timing-wise, we can't have reached 22, but it would be logically correct to get that state
assert_eq!(state.load(Ordering::Relaxed), 12);

let universe = test_index_builder.universe();
universe.kill();
fail::cfg("before-merge-split", "off").unwrap();
universe.quit().await;

Ok(())
}
Loading