Skip to content

[Draft] Cosmetic filter flatbuffers #496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: 0.11.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,11 @@ impl Blocker {
use crate::filters::fb_builder::FlatBufferBuilder;
use crate::filters::fb_network::FilterDataContext;

let memory =
FlatBufferBuilder::make_flatbuffer(network_filters, options.enable_optimizations);
let memory = FlatBufferBuilder::make_flatbuffer(
network_filters,
&mut Default::default(),
options.enable_optimizations,
);
let filter_data_context = FilterDataContext::new(memory);
Self::from_context(filter_data_context)
}
Expand Down
131 changes: 85 additions & 46 deletions src/cosmetic_filter_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
use crate::filters::cosmetic::{
CosmeticFilter, CosmeticFilterAction, CosmeticFilterMask, CosmeticFilterOperator,
};
use crate::filters::fb_network::FilterDataContextRef;
use crate::filters::flat_filter_map::{FlatFilterSetView, FlatMapView};
use crate::resources::{PermissionMask, ResourceStorage};
use crate::utils::Hash;

use std::collections::{HashMap, HashSet};
use std::mem;

use memchr::memchr as find_char;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -63,35 +66,30 @@ impl UrlSpecificResources {
/// will be blocked on any particular page, although when used correctly, all provided rules and
/// scriptlets should be safe to apply.
pub(crate) struct CosmeticFilterCache {
/// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`.
filter_data_context: FilterDataContextRef,

pub(crate) specific_rules: HostnameRuleDb,
}

#[derive(Default)]
pub(crate) struct CosmeticFilterNotProtoFields {
pub(crate) specific_rules: HostnameRuleDb,
}

#[derive(Default)]
pub(crate) struct CosmeticFilterCacheBuilder {
pub(crate) simple_class_rules: HashSet<String>,
/// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`.
pub(crate) simple_id_rules: HashSet<String>,
/// Rules that are the CSS selector of an element to be hidden on all sites, starting with a
/// class, e.g. `##.ad image`.
pub(crate) complex_class_rules: HashMap<String, Vec<String>>,
/// Rules that are the CSS selector of an element to be hidden on all sites, starting with an
/// id, e.g. `###banner > .text a`.
pub(crate) complex_id_rules: HashMap<String, Vec<String>>,

pub(crate) specific_rules: HostnameRuleDb,

/// Rules that are the CSS selector of an element to be hidden on all sites that do not fit
/// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]`
pub(crate) misc_generic_selectors: HashSet<String>,
}

impl CosmeticFilterCache {
pub fn new() -> Self {
Self {
simple_class_rules: HashSet::new(),
simple_id_rules: HashSet::new(),
complex_class_rules: HashMap::new(),
complex_id_rules: HashMap::new(),

specific_rules: HostnameRuleDb::default(),

misc_generic_selectors: HashSet::new(),
impl CosmeticFilterCacheBuilder {
pub fn take_non_proto_fields(&mut self) -> CosmeticFilterNotProtoFields {
CosmeticFilterNotProtoFields {
specific_rules: mem::take(&mut self.specific_rules),
}
}

Expand Down Expand Up @@ -164,6 +162,35 @@ impl CosmeticFilterCache {
self.misc_generic_selectors.insert(selector);
}
}
}

impl CosmeticFilterCache {
pub fn from_context(
filter_data_context: FilterDataContextRef,
not_proto_fields: CosmeticFilterNotProtoFields,
) -> Self {
Self {
filter_data_context,
specific_rules: not_proto_fields.specific_rules,
}
}

// /// Check if a class is in the simple class rules
// fn contains_simple_class_rule(&self, class: &str) -> bool {
// let root = self.filter_data_context.memory.root();
// FlatFilterSetView::new(root.simple_class_rules()).contains(class)
// }

#[cfg(test)]
pub fn from_rules(rules: Vec<CosmeticFilter>) -> Self {
use crate::filters::{fb_builder::FlatBufferBuilder, fb_network::FilterDataContext};

let mut builder = CosmeticFilterCacheBuilder::from_rules(rules);
let memory = FlatBufferBuilder::make_flatbuffer(vec![], &mut builder, true);

let filter_data_context = FilterDataContext::new(memory);
Self::from_context(filter_data_context, builder.take_non_proto_fields())
}

/// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they
/// apply to all sites. Rather than injecting all of these rules onto every page, which would
Expand Down Expand Up @@ -191,35 +218,41 @@ impl CosmeticFilterCache {
) -> Vec<String> {
let mut selectors = vec![];

let root = self.filter_data_context.memory.root();
let simple_class_rules = FlatFilterSetView::new(root.simple_class_rules());
let simple_id_rules = FlatFilterSetView::new(root.simple_id_rules());
let complex_class_rules = FlatMapView::new(
root.complex_class_rules_index(),
root.complex_class_rules_values(),
);
let complex_id_rules = FlatMapView::new(
root.complex_id_rules_index(),
root.complex_id_rules_values(),
);

classes.into_iter().for_each(|class| {
let class = class.as_ref();
if self.simple_class_rules.contains(class)
&& !exceptions.contains(&format!(".{}", class))
{
if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) {
selectors.push(format!(".{}", class));
}
if let Some(bucket) = self.complex_class_rules.get(class) {
selectors.extend(
bucket
.iter()
.filter(|sel| !exceptions.contains(*sel))
.map(|s| s.to_owned()),
);
}
let bucket = complex_class_rules.get(class);
selectors.extend(
bucket
.filter(|(_, sel)| !exceptions.contains(*sel))
.map(|(_, s)| s.to_string()),
);
});
ids.into_iter().for_each(|id| {
let id = id.as_ref();
if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) {
if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) {
selectors.push(format!("#{}", id));
}
if let Some(bucket) = self.complex_id_rules.get(id) {
selectors.extend(
bucket
.iter()
.filter(|sel| !exceptions.contains(*sel))
.map(|s| s.to_owned()),
);
}
let bucket = complex_id_rules.get(id);
selectors.extend(
bucket
.filter(|(_, sel)| !exceptions.contains(*sel))
.map(|(_, s)| s.to_string()),
);
});

selectors
Expand Down Expand Up @@ -334,11 +367,17 @@ impl CosmeticFilterCache {
let hide_selectors = if generichide {
specific_hide_selectors
} else {
let mut hide_selectors = self
.misc_generic_selectors
.difference(&exceptions)
.cloned()
.collect::<HashSet<_>>();
let root = self.filter_data_context.memory.root();
let misc_generic_selectors_vector = root.misc_generic_selectors();

// TODO: check performance of this
let mut hide_selectors = HashSet::new();
for i in 0..misc_generic_selectors_vector.len() {
let selector = misc_generic_selectors_vector.get(i);
if !exceptions.contains(selector) {
hide_selectors.insert(selector.to_string());
}
}
specific_hide_selectors.into_iter().for_each(|sel| {
hide_selectors.insert(sel);
});
Expand Down
8 changes: 4 additions & 4 deletions src/data_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod storage;
pub(crate) mod utils;

use crate::cosmetic_filter_cache::CosmeticFilterCache;
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
use crate::filters::fb_network::{FilterDataContext, FilterDataContextRef};
use crate::network_filter_list::NetworkFilterListParsingError;

/// Newer formats start with this magic byte sequence.
Expand Down Expand Up @@ -62,16 +62,16 @@ impl From<NetworkFilterListParsingError> for DeserializationError {
}

pub(crate) fn serialize_engine(
flatbuffer_memory: &VerifiedFlatbufferMemory,
context: &FilterDataContext,
cfc: &CosmeticFilterCache,
) -> Result<Vec<u8>, SerializationError> {
let serialize_format = storage::SerializeFormat::from((flatbuffer_memory, cfc));
let serialize_format = storage::SerializeFormat::from((context, cfc));
serialize_format.serialize()
}

pub(crate) fn deserialize_engine(
serialized: &[u8],
) -> Result<(VerifiedFlatbufferMemory, CosmeticFilterCache), DeserializationError> {
) -> Result<(FilterDataContextRef, CosmeticFilterCache), DeserializationError> {
let deserialize_format = storage::DeserializeFormat::deserialize(serialized)?;
deserialize_format.try_into()
}
Expand Down
63 changes: 18 additions & 45 deletions src/data_format/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
//!
//! Any new fields should be added to the _end_ of both `SerializeFormat` and `DeserializeFormat`.

use std::collections::{HashMap, HashSet};
use std::collections::HashMap;

use rmp_serde as rmps;
use serde::{Deserialize, Serialize};

use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter};
use crate::cosmetic_filter_cache::{
CosmeticFilterCache, CosmeticFilterNotProtoFields, HostnameRuleDb, ProceduralOrActionFilter,
};
use crate::filters::fb_network::{FilterDataContext, FilterDataContextRef};
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
use crate::utils::Hash;

use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization};
use super::utils::stabilize_hashmap_serialization;
use super::{DeserializationError, SerializationError};

/// Each variant describes a single rule that is specific to a particular hostname.
Expand Down Expand Up @@ -188,20 +191,8 @@ pub(crate) struct SerializeFormat<'a> {

resources: LegacyRedirectResourceStorage,

#[serde(serialize_with = "stabilize_hashset_serialization")]
simple_class_rules: &'a HashSet<String>,
#[serde(serialize_with = "stabilize_hashset_serialization")]
simple_id_rules: &'a HashSet<String>,
#[serde(serialize_with = "stabilize_hashmap_serialization")]
complex_class_rules: &'a HashMap<String, Vec<String>>,
#[serde(serialize_with = "stabilize_hashmap_serialization")]
complex_id_rules: &'a HashMap<String, Vec<String>>,

specific_rules: LegacyHostnameRuleDb,

#[serde(serialize_with = "stabilize_hashset_serialization")]
misc_generic_selectors: &'a HashSet<String>,

scriptlets: LegacyScriptletResourceStorage,

#[serde(serialize_with = "stabilize_hashmap_serialization")]
Expand All @@ -227,15 +218,8 @@ pub(crate) struct DeserializeFormat {

_resources: LegacyRedirectResourceStorage,

simple_class_rules: HashSet<String>,
simple_id_rules: HashSet<String>,
complex_class_rules: HashMap<String, Vec<String>>,
complex_id_rules: HashMap<String, Vec<String>>,

specific_rules: LegacyHostnameRuleDb,

misc_generic_selectors: HashSet<String>,

_scriptlets: LegacyScriptletResourceStorage,

#[serde(default)]
Expand All @@ -252,23 +236,16 @@ impl DeserializeFormat {
}
}

impl<'a> From<(&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)> for SerializeFormat<'a> {
fn from(v: (&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)) -> Self {
let (memory, cfc) = v;
impl<'a> From<(&'a FilterDataContext, &'a CosmeticFilterCache)> for SerializeFormat<'a> {
fn from(v: (&'a FilterDataContext, &'a CosmeticFilterCache)) -> Self {
let (context, cfc) = v;
Self {
flatbuffer_memory: memory.data().to_vec(),
flatbuffer_memory: context.memory.data().to_vec(),

resources: LegacyRedirectResourceStorage::default(),

simple_class_rules: &cfc.simple_class_rules,
simple_id_rules: &cfc.simple_id_rules,
complex_class_rules: &cfc.complex_class_rules,
complex_id_rules: &cfc.complex_id_rules,

specific_rules: (&cfc.specific_rules).into(),

misc_generic_selectors: &cfc.misc_generic_selectors,

scriptlets: LegacyScriptletResourceStorage::default(),

procedural_action: &cfc.specific_rules.procedural_action.0,
Expand All @@ -277,9 +254,10 @@ impl<'a> From<(&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)> for Seria
}
}

impl TryFrom<DeserializeFormat> for (VerifiedFlatbufferMemory, CosmeticFilterCache) {
impl TryFrom<DeserializeFormat> for (FilterDataContextRef, CosmeticFilterCache) {
fn try_from(v: DeserializeFormat) -> Result<Self, Self::Error> {
use crate::cosmetic_filter_cache::HostnameFilterBin;
use crate::filters::fb_network::FilterDataContext;

let mut specific_rules: HostnameRuleDb = v.specific_rules.into();
specific_rules.procedural_action = HostnameFilterBin(v.procedural_action);
Expand All @@ -289,19 +267,14 @@ impl TryFrom<DeserializeFormat> for (VerifiedFlatbufferMemory, CosmeticFilterCac
let memory = VerifiedFlatbufferMemory::from_raw(v.flatbuffer_memory)
.map_err(DeserializationError::FlatBufferParsingError)?;

Ok((
memory,
CosmeticFilterCache {
simple_class_rules: v.simple_class_rules,
simple_id_rules: v.simple_id_rules,
complex_class_rules: v.complex_class_rules,
complex_id_rules: v.complex_id_rules,
let filter_data_context = FilterDataContext::new(memory);

specific_rules,
let cosmetic_cache = CosmeticFilterCache::from_context(
filter_data_context.clone(),
CosmeticFilterNotProtoFields { specific_rules },
);

misc_generic_selectors: v.misc_generic_selectors,
},
))
Ok((filter_data_context, cosmetic_cache))
}

type Error = DeserializationError;
Expand Down
13 changes: 1 addition & 12 deletions src/data_format/utils.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,10 @@
//! Common utilities associated with serialization and deserialization of the `Engine` data into
//! binary formats.

use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::collections::{BTreeMap, HashMap};

use serde::{Serialize, Serializer};

/// Forces a `HashSet` to be serialized with a stable ordering by temporarily representing it as a
/// `BTreeSet`.
pub fn stabilize_hashset_serialization<S, V>(set: &HashSet<V>, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
V: Ord + serde::Serialize,
{
let stabilized: BTreeSet<&V> = set.iter().collect();
stabilized.serialize(s)
}

/// Forces a `HashMap` to be serialized with a stable ordering by temporarily representing it as a
/// `BTreeMap`.
pub fn stabilize_hashmap_serialization<S, K, V>(
Expand Down
Loading
Loading