From c19371d079b57041a8cd027a52e538656c165bdc Mon Sep 17 00:00:00 2001
From: Myzel394 <50424412+Myzel394@users.noreply.github.com>
Date: Wed, 21 Feb 2024 09:41:29 +0100
Subject: [PATCH] fix: cleanup; improve code quality
---
src/engines/brave.rs | 58 ++++-------------------------
src/engines/engine_base.rs | 75 +++++++++++++++++++++++++++++++++++---
src/main.rs | 1 +
3 files changed, 77 insertions(+), 57 deletions(-)
diff --git a/src/engines/brave.rs b/src/engines/brave.rs
index f33b254..cb594eb 100644
--- a/src/engines/brave.rs
+++ b/src/engines/brave.rs
@@ -1,27 +1,19 @@
// Search engine parser for Brave Search
// This uses the clearnet, unlocalized version of the search engine.
pub mod brave {
- use std::sync::Arc;
-
- use futures::lock::Mutex;
use lazy_static::lazy_static;
use regex::Regex;
use tokio::sync::mpsc::Sender;
- use urlencoding::decode;
use crate::{
- engines::engine_base::engine_base::{
- EngineBase, EnginePositions, ResultsCollector, SearchEngine, SearchResult,
- },
+ engines::engine_base::engine_base::{EngineBase, EnginePositions, SearchResult},
helpers::helpers::build_default_client,
- utils::utils::decode_html_text,
};
lazy_static! {
static ref RESULTS_START: Regex = Regex::new(r#"
(?P.+?).+?(?P.+?)
"#).unwrap();
static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
- static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
}
#[derive(Clone, Debug)]
@@ -31,49 +23,13 @@ pub mod brave {
impl EngineBase for Brave {
fn parse_next<'a>(&mut self) -> Option {
- if self.positions.started {
- if let Some(capture) =
- SINGLE_RESULT.captures(&self.positions.previous_block.to_owned())
- {
- let title = decode(capture.name("title").unwrap().as_str())
- .unwrap()
- .into_owned();
- let description_raw =
- decode_html_text(capture.name("description").unwrap().as_str()).unwrap();
- let description = STRIP_HTML_TAGS
- .replace_all(&description_raw, "")
- .into_owned();
- let url = decode(capture.name("url").unwrap().as_str())
- .unwrap()
- .into_owned();
-
- let result = SearchResult {
- title,
- description,
- url,
- engine: SearchEngine::DuckDuckGo,
- };
-
- let end_position = capture.get(0).unwrap().end();
- self.positions.slice_remaining_block(&end_position);
-
- return Some(result);
- }
- }
-
- None
+ self.positions
+ .handle_block_using_default_method(&SINGLE_RESULT)
}
fn push_packet<'a>(&mut self, packet: impl Iterator- ) {
- let bytes: Vec = packet.map(|bit| *bit).collect();
- let raw_text = String::from_utf8_lossy(&bytes);
- let text = STRIP.replace_all(&raw_text, " ");
-
- if self.positions.started {
- self.positions.previous_block.push_str(&text);
- } else {
- self.positions.started = RESULTS_START.is_match(&text);
- }
+ self.positions
+ .handle_start_check_using_default_method(&RESULTS_START, packet)
}
}
@@ -84,13 +40,13 @@ pub mod brave {
}
}
- pub async fn search(&mut self, query: &str, tx: Sender) {
+ pub async fn search(&mut self, query: &str, tx: Sender) -> Result<(), ()> {
let client = build_default_client();
let request = client
.get(format!("https://search.brave.com/search?q={}", query))
.send();
- self.handle_request(request, tx).await;
+ self.handle_request(request, tx).await
}
}
}
diff --git a/src/engines/engine_base.rs b/src/engines/engine_base.rs
index 23e1eaa..792efc0 100644
--- a/src/engines/engine_base.rs
+++ b/src/engines/engine_base.rs
@@ -6,17 +6,22 @@ pub mod engine_base {
use regex::Regex;
use reqwest::{Error, Response};
use tokio::sync::mpsc::Sender;
+ use urlencoding::decode;
+
+ use crate::utils::utils::decode_html_text;
lazy_static! {
- static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
+ static ref STRIP: Regex = Regex::new(r"[\s\n]+").unwrap();
+ static ref STRIP_HTML_TAGS: Regex =
+ Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
}
- #[derive(Clone, Copy, Debug, Hash)]
+ #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum SearchEngine {
DuckDuckGo,
}
- #[derive(Clone, Debug, Hash)]
+ #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct SearchResult {
pub title: String,
pub url: String,
@@ -52,7 +57,7 @@ pub mod engine_base {
&mut self,
request: impl Future