mirror of
https://github.com/Myzel394/tifsep.git
synced 2025-06-18 23:45:27 +02:00
fix: Adding better support for one engine
This commit is contained in:
parent
78a87f207c
commit
f173631404
15
Cargo.lock
generated
15
Cargo.lock
generated
@ -1070,10 +1070,12 @@ dependencies = [
|
|||||||
"system-configuration",
|
"system-configuration",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-native-tls",
|
"tokio-native-tls",
|
||||||
|
"tokio-util",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"url",
|
"url",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"wasm-bindgen-futures",
|
"wasm-bindgen-futures",
|
||||||
|
"wasm-streams",
|
||||||
"web-sys",
|
"web-sys",
|
||||||
"winreg",
|
"winreg",
|
||||||
]
|
]
|
||||||
@ -1860,6 +1862,19 @@ version = "0.2.90"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b"
|
checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-streams"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7"
|
||||||
|
dependencies = [
|
||||||
|
"futures-util",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "web-sys"
|
name = "web-sys"
|
||||||
version = "0.3.67"
|
version = "0.3.67"
|
||||||
|
@ -11,7 +11,7 @@ futures = "0.3.30"
|
|||||||
lazy-regex = "3.1.0"
|
lazy-regex = "3.1.0"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
regex = "1.10.3"
|
regex = "1.10.3"
|
||||||
reqwest = "0.11.23"
|
reqwest = { version = "0.11.23", features = ["stream"] }
|
||||||
rocket = "0.5.0"
|
rocket = "0.5.0"
|
||||||
rustls = "0.22.2"
|
rustls = "0.22.2"
|
||||||
tokio = {version = "1.35.1", features = ["full"]}
|
tokio = {version = "1.35.1", features = ["full"]}
|
||||||
|
@ -1,17 +1,16 @@
|
|||||||
// Search engine parser for DuckDuckGo
|
// Search engine parser for DuckDuckGo
|
||||||
pub mod duckduckgo {
|
pub mod duckduckgo {
|
||||||
use std::{
|
use std::{
|
||||||
cmp::min,
|
|
||||||
collections::VecDeque,
|
collections::VecDeque,
|
||||||
pin::Pin,
|
pin::{pin, Pin},
|
||||||
str::Bytes,
|
|
||||||
task::{Context, Poll},
|
task::{Context, Poll},
|
||||||
};
|
};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use futures::Stream;
|
use futures::{FutureExt, Stream, StreamExt};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use rocket::http::hyper::body::Bytes;
|
||||||
use urlencoding::decode;
|
use urlencoding::decode;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -35,32 +34,7 @@ pub mod duckduckgo {
|
|||||||
results_started: bool,
|
results_started: bool,
|
||||||
previous_block: String,
|
previous_block: String,
|
||||||
// Holds all results until consumed by iterator
|
// Holds all results until consumed by iterator
|
||||||
pub results: VecDeque<SearchResult>,
|
pub results: Vec<SearchResult>,
|
||||||
}
|
|
||||||
|
|
||||||
impl DuckDuckGo {
|
|
||||||
fn slice_remaining_block(&mut self, start_position: &usize) {
|
|
||||||
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
|
|
||||||
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
|
|
||||||
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
|
|
||||||
|
|
||||||
self.previous_block.clear();
|
|
||||||
self.previous_block.push_str(&remaining_text);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
callback: Box::new(|_: SearchResult| {}),
|
|
||||||
results_started: false,
|
|
||||||
previous_block: String::new(),
|
|
||||||
results: VecDeque::new(),
|
|
||||||
completed: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_callback(&mut self, callback: CallbackType) {
|
|
||||||
self.callback = callback;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// impl Stream for DuckDuckGo {
|
// impl Stream for DuckDuckGo {
|
||||||
@ -100,33 +74,80 @@ pub mod duckduckgo {
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
pub struct DuckDuckGoSearchStream<'a> {
|
||||||
|
pub ddg: &'a DuckDuckGo,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DuckDuckGoSearchStream<'a> {
|
||||||
|
pub fn new(ddg: &'a DuckDuckGo) -> Self {
|
||||||
|
Self { ddg }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl EngineBase for DuckDuckGo {
|
impl Stream for DuckDuckGoSearchStream<'_> {
|
||||||
fn search(&mut self, query: &str) {
|
type Item = SearchResult;
|
||||||
dbg!("searching duckduckgo");
|
|
||||||
|
|
||||||
let client = Client::new("https://html.duckduckgo.com/html/");
|
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||||
|
Poll::Pending
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let packets = client.request(&"POST").unwrap();
|
impl DuckDuckGo {
|
||||||
|
pub fn get_stream<'a>(&'a self) -> impl Stream<Item = SearchResult> + 'a {
|
||||||
|
DuckDuckGoSearchStream { ddg: self }
|
||||||
|
}
|
||||||
|
|
||||||
for ii in (0..packets.len()).step_by(PACKET_SIZE) {
|
fn slice_remaining_block(&mut self, start_position: &usize) {
|
||||||
let end_range = min(packets.len(), ii + PACKET_SIZE);
|
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
|
||||||
|
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
|
||||||
|
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
|
||||||
|
|
||||||
let slice = &packets[ii..end_range];
|
self.previous_block.clear();
|
||||||
self.parse_packet(slice.iter());
|
self.previous_block.push_str(&remaining_text);
|
||||||
|
}
|
||||||
|
|
||||||
// Call callback, there is probably a better way to do this
|
pub fn new() -> Self {
|
||||||
// while self.results.len() > 0 {
|
Self {
|
||||||
// let result = self.results.pop_front().unwrap();
|
callback: Box::new(|_: SearchResult| {}),
|
||||||
//
|
results_started: false,
|
||||||
// (self.callback)(result);
|
previous_block: String::new(),
|
||||||
// }
|
results: vec![],
|
||||||
|
completed: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_callback(&mut self, callback: CallbackType) {
|
||||||
|
self.callback = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search(&mut self, query: &str) {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
|
||||||
|
let mut stream = client
|
||||||
|
.post("https://html.duckduckgo.com/html/")
|
||||||
|
.header("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
.body("q=duck")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.bytes_stream();
|
||||||
|
|
||||||
|
while let Some(item) = stream.next().await {
|
||||||
|
let packet = item.unwrap();
|
||||||
|
|
||||||
|
if let Some(result) = self.parse_packet(packet.iter()) {
|
||||||
|
self.results.push(result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.completed = true;
|
self.completed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
|
fn parse_packet<'a>(
|
||||||
|
&mut self,
|
||||||
|
packet: impl Iterator<Item = &'a u8>,
|
||||||
|
) -> Option<SearchResult> {
|
||||||
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
|
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
|
||||||
let raw_text = String::from_utf8_lossy(&bytes);
|
let raw_text = String::from_utf8_lossy(&bytes);
|
||||||
let text = STRIP.replace_all(&raw_text, " ");
|
let text = STRIP.replace_all(&raw_text, " ");
|
||||||
@ -159,15 +180,15 @@ pub mod duckduckgo {
|
|||||||
let end_position = captures.name("end").unwrap().end();
|
let end_position = captures.name("end").unwrap().end();
|
||||||
self.slice_remaining_block(&end_position);
|
self.slice_remaining_block(&end_position);
|
||||||
|
|
||||||
// (self.callback)(result);
|
return Some(result);
|
||||||
|
|
||||||
self.results.push_back(result);
|
|
||||||
}
|
}
|
||||||
None => {}
|
None => {}
|
||||||
}
|
}
|
||||||
} else if RESULTS_START.is_match(&text) {
|
} else if RESULTS_START.is_match(&text) {
|
||||||
self.results_started = true;
|
self.results_started = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,6 @@ pub mod engine_base {
|
|||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait EngineBase {
|
pub trait EngineBase {
|
||||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
|
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
|
||||||
fn search(&mut self, query: &str);
|
async fn search(&mut self, query: &str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
74
src/main.rs
74
src/main.rs
@ -26,13 +26,10 @@ use std::{
|
|||||||
sync::{Arc, RwLock},
|
sync::{Arc, RwLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
use rocket::response::stream::{ReaderStream, TextStream};
|
use futures::lock::Mutex;
|
||||||
|
use rocket::response::stream::TextStream;
|
||||||
|
|
||||||
use engines::{
|
use engines::duckduckgo::duckduckgo::DuckDuckGo;
|
||||||
duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase,
|
|
||||||
engine_base::engine_base::SearchResult,
|
|
||||||
};
|
|
||||||
use tokio::sync::Mutex;
|
|
||||||
|
|
||||||
pub mod client;
|
pub mod client;
|
||||||
pub mod engines;
|
pub mod engines;
|
||||||
@ -66,55 +63,52 @@ fn search_get() -> &'static str {
|
|||||||
async fn hello<'a>(query: &str) -> TextStream![String] {
|
async fn hello<'a>(query: &str) -> TextStream![String] {
|
||||||
let query_box = Box::new(query.to_string());
|
let query_box = Box::new(query.to_string());
|
||||||
|
|
||||||
|
let ddg_ref = Arc::new(Mutex::new(DuckDuckGo::new()));
|
||||||
|
let ddg_writer_ref = ddg_ref.clone();
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut ddg = ddg_writer_ref.lock().await;
|
||||||
|
ddg.search(&query_box).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut current_index = 0;
|
||||||
|
|
||||||
TextStream! {
|
TextStream! {
|
||||||
let start = "<html><body>".to_string();
|
let start = "<DOCTYPE!html><html><body>".to_string();
|
||||||
yield start;
|
yield start;
|
||||||
|
|
||||||
let ddg_tv = Arc::new(
|
|
||||||
Mutex::new(
|
|
||||||
DuckDuckGo::new(),
|
|
||||||
),
|
|
||||||
);
|
|
||||||
let ddg_tv_clone = ddg_tv.clone();
|
|
||||||
|
|
||||||
tokio::spawn(async move {
|
|
||||||
ddg_tv_clone.lock().await.search(&query_box);
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut last_position: i32 = -1;
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let ddg = ddg_tv.lock().await;
|
let ddg = ddg_ref.lock().await;
|
||||||
let len = ddg.results.len() as i32;
|
|
||||||
|
|
||||||
if ddg.completed && last_position == len {
|
let len = ddg.results.len();
|
||||||
break;
|
|
||||||
|
if len == 0 {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if last_position < (len - 1) {
|
if ddg.completed && current_index == len - 1 {
|
||||||
for i in max(0, last_position)..=(len - 1) {
|
break
|
||||||
match ddg.results.get(i as usize).clone() {
|
|
||||||
Some(result) => {
|
|
||||||
let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
|
|
||||||
yield html;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
last_position = len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for ii in (current_index + 1)..len {
|
||||||
|
let result = ddg.results.get(ii);
|
||||||
|
|
||||||
|
dbg!(&result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// [1] -> 0
|
||||||
|
// 1 -> [1]
|
||||||
|
current_index = len - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let end = "</body></html>".to_string();
|
let end = "</body></html>".to_string();
|
||||||
yield end;
|
|
||||||
|
yield end
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[launch]
|
#[launch]
|
||||||
fn rocket() -> _ {
|
async fn rocket() -> _ {
|
||||||
rocket::build()
|
rocket::build()
|
||||||
.mount("/", routes![index])
|
.mount("/", routes![index])
|
||||||
.mount("/", routes![hello])
|
.mount("/", routes![hello])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user