fix: Adding better support for one engine

This commit is contained in:
Myzel394 2024-02-08 21:59:01 +01:00
parent 78a87f207c
commit f173631404
No known key found for this signature in database
GPG Key ID: 79CC92F37B3E1A2B
5 changed files with 121 additions and 91 deletions

15
Cargo.lock generated
View File

@ -1070,10 +1070,12 @@ dependencies = [
"system-configuration", "system-configuration",
"tokio", "tokio",
"tokio-native-tls", "tokio-native-tls",
"tokio-util",
"tower-service", "tower-service",
"url", "url",
"wasm-bindgen", "wasm-bindgen",
"wasm-bindgen-futures", "wasm-bindgen-futures",
"wasm-streams",
"web-sys", "web-sys",
"winreg", "winreg",
] ]
@ -1860,6 +1862,19 @@ version = "0.2.90"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b"
[[package]]
name = "wasm-streams"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]] [[package]]
name = "web-sys" name = "web-sys"
version = "0.3.67" version = "0.3.67"

View File

@ -11,7 +11,7 @@ futures = "0.3.30"
lazy-regex = "3.1.0" lazy-regex = "3.1.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
regex = "1.10.3" regex = "1.10.3"
reqwest = "0.11.23" reqwest = { version = "0.11.23", features = ["stream"] }
rocket = "0.5.0" rocket = "0.5.0"
rustls = "0.22.2" rustls = "0.22.2"
tokio = {version = "1.35.1", features = ["full"]} tokio = {version = "1.35.1", features = ["full"]}

View File

@ -1,17 +1,16 @@
// Search engine parser for DuckDuckGo // Search engine parser for DuckDuckGo
pub mod duckduckgo { pub mod duckduckgo {
use std::{ use std::{
cmp::min,
collections::VecDeque, collections::VecDeque,
pin::Pin, pin::{pin, Pin},
str::Bytes,
task::{Context, Poll}, task::{Context, Poll},
}; };
use async_trait::async_trait; use async_trait::async_trait;
use futures::Stream; use futures::{FutureExt, Stream, StreamExt};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use regex::Regex; use regex::Regex;
use rocket::http::hyper::body::Bytes;
use urlencoding::decode; use urlencoding::decode;
use crate::{ use crate::{
@ -35,32 +34,7 @@ pub mod duckduckgo {
results_started: bool, results_started: bool,
previous_block: String, previous_block: String,
// Holds all results until consumed by iterator // Holds all results until consumed by iterator
pub results: VecDeque<SearchResult>, pub results: Vec<SearchResult>,
}
impl DuckDuckGo {
fn slice_remaining_block(&mut self, start_position: &usize) {
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
self.previous_block.clear();
self.previous_block.push_str(&remaining_text);
}
pub fn new() -> Self {
Self {
callback: Box::new(|_: SearchResult| {}),
results_started: false,
previous_block: String::new(),
results: VecDeque::new(),
completed: false,
}
}
pub fn set_callback(&mut self, callback: CallbackType) {
self.callback = callback;
}
} }
// impl Stream for DuckDuckGo { // impl Stream for DuckDuckGo {
@ -100,33 +74,80 @@ pub mod duckduckgo {
// } // }
// } // }
pub struct DuckDuckGoSearchStream<'a> {
pub ddg: &'a DuckDuckGo,
}
impl<'a> DuckDuckGoSearchStream<'a> {
pub fn new(ddg: &'a DuckDuckGo) -> Self {
Self { ddg }
}
}
#[async_trait] #[async_trait]
impl EngineBase for DuckDuckGo { impl Stream for DuckDuckGoSearchStream<'_> {
fn search(&mut self, query: &str) { type Item = SearchResult;
dbg!("searching duckduckgo");
let client = Client::new("https://html.duckduckgo.com/html/"); fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Pending
}
}
let packets = client.request(&"POST").unwrap(); impl DuckDuckGo {
pub fn get_stream<'a>(&'a self) -> impl Stream<Item = SearchResult> + 'a {
DuckDuckGoSearchStream { ddg: self }
}
for ii in (0..packets.len()).step_by(PACKET_SIZE) { fn slice_remaining_block(&mut self, start_position: &usize) {
let end_range = min(packets.len(), ii + PACKET_SIZE); let previous_block_bytes = self.previous_block.as_bytes().to_vec();
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
let slice = &packets[ii..end_range]; self.previous_block.clear();
self.parse_packet(slice.iter()); self.previous_block.push_str(&remaining_text);
}
// Call callback, there is probably a better way to do this pub fn new() -> Self {
// while self.results.len() > 0 { Self {
// let result = self.results.pop_front().unwrap(); callback: Box::new(|_: SearchResult| {}),
// results_started: false,
// (self.callback)(result); previous_block: String::new(),
// } results: vec![],
completed: false,
}
}
pub fn set_callback(&mut self, callback: CallbackType) {
self.callback = callback;
}
pub async fn search(&mut self, query: &str) {
let client = reqwest::Client::new();
let mut stream = client
.post("https://html.duckduckgo.com/html/")
.header("Content-Type", "application/x-www-form-urlencoded")
.body("q=duck")
.send()
.await
.unwrap()
.bytes_stream();
while let Some(item) = stream.next().await {
let packet = item.unwrap();
if let Some(result) = self.parse_packet(packet.iter()) {
self.results.push(result);
}
} }
self.completed = true; self.completed = true;
} }
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) { fn parse_packet<'a>(
&mut self,
packet: impl Iterator<Item = &'a u8>,
) -> Option<SearchResult> {
let bytes: Vec<u8> = packet.map(|bit| *bit).collect(); let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
let raw_text = String::from_utf8_lossy(&bytes); let raw_text = String::from_utf8_lossy(&bytes);
let text = STRIP.replace_all(&raw_text, " "); let text = STRIP.replace_all(&raw_text, " ");
@ -159,15 +180,15 @@ pub mod duckduckgo {
let end_position = captures.name("end").unwrap().end(); let end_position = captures.name("end").unwrap().end();
self.slice_remaining_block(&end_position); self.slice_remaining_block(&end_position);
// (self.callback)(result); return Some(result);
self.results.push_back(result);
} }
None => {} None => {}
} }
} else if RESULTS_START.is_match(&text) { } else if RESULTS_START.is_match(&text) {
self.results_started = true; self.results_started = true;
} }
None
} }
} }
} }

View File

@ -17,6 +17,6 @@ pub mod engine_base {
#[async_trait] #[async_trait]
pub trait EngineBase { pub trait EngineBase {
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>); fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
fn search(&mut self, query: &str); async fn search(&mut self, query: &str);
} }
} }

View File

@ -26,13 +26,10 @@ use std::{
sync::{Arc, RwLock}, sync::{Arc, RwLock},
}; };
use rocket::response::stream::{ReaderStream, TextStream}; use futures::lock::Mutex;
use rocket::response::stream::TextStream;
use engines::{ use engines::duckduckgo::duckduckgo::DuckDuckGo;
duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase,
engine_base::engine_base::SearchResult,
};
use tokio::sync::Mutex;
pub mod client; pub mod client;
pub mod engines; pub mod engines;
@ -66,55 +63,52 @@ fn search_get() -> &'static str {
async fn hello<'a>(query: &str) -> TextStream![String] { async fn hello<'a>(query: &str) -> TextStream![String] {
let query_box = Box::new(query.to_string()); let query_box = Box::new(query.to_string());
TextStream! { let ddg_ref = Arc::new(Mutex::new(DuckDuckGo::new()));
let start = "<html><body>".to_string(); let ddg_writer_ref = ddg_ref.clone();
yield start;
let ddg_tv = Arc::new(
Mutex::new(
DuckDuckGo::new(),
),
);
let ddg_tv_clone = ddg_tv.clone();
tokio::spawn(async move { tokio::spawn(async move {
ddg_tv_clone.lock().await.search(&query_box); let mut ddg = ddg_writer_ref.lock().await;
ddg.search(&query_box).await;
}); });
let mut last_position: i32 = -1; let mut current_index = 0;
TextStream! {
let start = "<DOCTYPE!html><html><body>".to_string();
yield start;
loop { loop {
let ddg = ddg_tv.lock().await; let ddg = ddg_ref.lock().await;
let len = ddg.results.len() as i32;
if ddg.completed && last_position == len { let len = ddg.results.len();
break;
if len == 0 {
continue
} }
if last_position < (len - 1) { if ddg.completed && current_index == len - 1 {
for i in max(0, last_position)..=(len - 1) { break
match ddg.results.get(i as usize).clone() {
Some(result) => {
let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
yield html;
}
None => {
break;
}
}
} }
last_position = len; for ii in (current_index + 1)..len {
let result = ddg.results.get(ii);
dbg!(&result);
} }
// [1] -> 0
// 1 -> [1]
current_index = len - 1;
} }
let end = "</body></html>".to_string(); let end = "</body></html>".to_string();
yield end;
yield end
} }
} }
#[launch] #[launch]
fn rocket() -> _ { async fn rocket() -> _ {
rocket::build() rocket::build()
.mount("/", routes![index]) .mount("/", routes![index])
.mount("/", routes![hello]) .mount("/", routes![hello])