feat: Add first real working wip for brave; With real real time processing

This commit is contained in:
Myzel394 2024-02-18 21:50:51 +01:00
parent e3e22458b0
commit e1bd09df44
No known key found for this signature in database
GPG Key ID: DEC4AAB876F73185
11 changed files with 2884 additions and 129 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
/target
build
./tailwindcss/node_modules/*

218
Cargo.lock generated
View File

@ -80,6 +80,32 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "aws-lc-rs"
version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df33e4a55b03f8780ba55041bc7be91a2a8ec8c03517b0379d2d6c96d2c30d95"
dependencies = [
"aws-lc-sys",
"mirai-annotations",
"paste",
"zeroize",
]
[[package]]
name = "aws-lc-sys"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5f269b176dc4aeb593910fa56ed6f956cde19542e496bb0bfc1ad9a6ce18815"
dependencies = [
"bindgen",
"cmake",
"dunce",
"fs_extra",
"libc",
"paste",
]
[[package]]
name = "backtrace"
version = "0.3.69"
@ -107,6 +133,29 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72"
[[package]]
name = "bindgen"
version = "0.69.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
dependencies = [
"bitflags 2.4.2",
"cexpr",
"clang-sys",
"itertools",
"lazy_static",
"lazycell",
"log",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@ -146,12 +195,41 @@ dependencies = [
"libc",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "cmake"
version = "0.1.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
dependencies = [
"cc",
]
[[package]]
name = "cookie"
version = "0.18.0"
@ -221,6 +299,12 @@ dependencies = [
"syn",
]
[[package]]
name = "dunce"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b"
[[package]]
name = "either"
version = "1.9.0"
@ -302,6 +386,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futures"
version = "0.3.30"
@ -458,6 +548,15 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "http"
version = "0.2.11"
@ -573,6 +672,15 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "itertools"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.10"
@ -617,12 +725,28 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "libloading"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.13"
@ -681,6 +805,12 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@ -697,10 +827,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09"
dependencies = [
"libc",
"log",
"wasi",
"windows-sys 0.48.0",
]
[[package]]
name = "mirai-annotations"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1"
[[package]]
name = "multer"
version = "2.1.0"
@ -739,6 +876,16 @@ dependencies = [
"tempfile",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@ -847,6 +994,12 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]]
name = "pear"
version = "0.2.8"
@ -906,6 +1059,16 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "prettyplease"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.78"
@ -1181,6 +1344,12 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.38.30"
@ -1196,12 +1365,11 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41"
version = "0.23.0-alpha.0"
dependencies = [
"aws-lc-rs",
"log",
"ring",
"once_cell",
"rustls-pki-types",
"rustls-webpki",
"subtle",
@ -1209,17 +1377,28 @@ dependencies = [
]
[[package]]
name = "rustls-pki-types"
version = "1.1.0"
name = "rustls-pemfile"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a"
checksum = "3c333bb734fcdedcea57de1602543590f545f127dc8b533324318fd492c5c70b"
dependencies = [
"base64",
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7"
[[package]]
name = "rustls-webpki"
version = "0.102.1"
version = "0.102.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b"
checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
dependencies = [
"aws-lc-rs",
"ring",
"rustls-pki-types",
"untrusted",
@ -1342,6 +1521,12 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@ -1443,13 +1628,16 @@ name = "tcp_test"
version = "0.1.0"
dependencies = [
"async-trait",
"bytes",
"futures",
"lazy-regex",
"lazy_static",
"mio",
"regex",
"reqwest",
"rocket",
"rustls",
"rustls-pemfile",
"tokio",
"urlencoding",
"webpki-roots",
@ -1894,6 +2082,18 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]]
name = "winapi"
version = "0.3.9"

View File

@ -7,13 +7,16 @@ edition = "2021"
[dependencies]
async-trait = "0.1.77"
bytes = "1.5.0"
futures = "0.3.30"
lazy-regex = "3.1.0"
lazy_static = "1.4.0"
mio = { version = "0.8.10", features = ["net", "os-poll", ] }
regex = "1.10.3"
reqwest = { version = "0.11.23", features = ["stream"] }
rocket = "0.5.0"
rustls = "0.22.2"
rustls = { path = "../rustls/rustls", features = ["logging"] }
rustls-pemfile = "2"
tokio = {version = "1.35.1", features = ["full"]}
urlencoding = "2.1.3"
webpki-roots = "0.26.0"

View File

@ -1,2 +1,3 @@
pub mod brave;
pub mod duckduckgo;
pub mod engine_base;

109
src/engines/brave.rs Normal file
View File

@ -0,0 +1,109 @@
// Search engine parser for Brave Search
// This uses the clearnet, unlocalized version of the search engine.
pub mod brave {
use lazy_static::lazy_static;
use regex::Regex;
use urlencoding::decode;
use crate::{
engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
utils::utils::decode_html_text,
};
lazy_static! {
static ref RESULTS_START: Regex = Regex::new(r#"<body"#).unwrap();
static ref SINGLE_RESULT: Regex = Regex::new(r#"<div class="snippet svelte-.+?<a href=.(?P<url>.+?)".?((class="title svelt.+?">(?P<title>.+?))|(class="h.? svelte.+?<div class="title.+?>(?P<title2>.+?)<)).+?<!-- HTML_TAG_START -->(?P<description>.+?)<!-- HTML_TAG_END -->(?P<end>(.*?<\/div>( <\/div>){2})|(</div>.*?</div>.*?<div class="deep.*?data-sveltekit-reload.*?</div></div>.*?</div>))"#).unwrap();
static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
}
pub struct Brave {
pub completed: bool,
results_started: bool,
pub previous_block: String,
pub results: Vec<SearchResult>,
}
impl Brave {
fn slice_remaining_block(&mut self, start_position: &usize) {
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
self.previous_block.clear();
self.previous_block.push_str(&remaining_text);
}
pub fn new() -> Self {
Self {
results_started: false,
previous_block: String::new(),
results: vec![],
completed: false,
}
}
}
impl EngineBase for Brave {
fn add_result(&mut self, result: crate::engines::engine_base::engine_base::SearchResult) {
self.results.push(result);
}
fn parse_next<'a>(&mut self) -> Option<SearchResult> {
if self.results_started {
match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
Some(captures) => {
let title = decode(
captures
.name("title")
.unwrap_or_else(|| captures.name("title2").unwrap())
.as_str(),
)
.unwrap()
.into_owned();
let description_raw =
decode_html_text(captures.name("description").unwrap().as_str())
.unwrap();
let description = STRIP_HTML_TAGS
.replace_all(&description_raw, "")
.into_owned();
let url = decode(captures.name("url").unwrap().as_str())
.unwrap()
.into_owned();
let result = SearchResult {
title,
description,
url,
engine: SearchEngine::DuckDuckGo,
};
let end_position = captures.name("end").unwrap().end();
self.slice_remaining_block(&end_position);
return Some(result);
}
None => {}
}
}
None
}
fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
let raw_text = String::from_utf8_lossy(&bytes);
let text = STRIP.replace_all(&raw_text, " ");
if self.results_started {
self.previous_block.push_str(&text);
} else {
self.results_started = RESULTS_START.is_match(&text);
}
}
async fn search(&mut self, query: &str) {
todo!()
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,18 +1,17 @@
// Search engine parser for DuckDuckGo
pub mod duckduckgo {
use std::{
pin::{pin, Pin},
task::{Context, Poll},
io::{Read, Write},
net::TcpStream,
sync::Arc,
};
use async_trait::async_trait;
use futures::{AsyncRead, FutureExt, Stream, StreamExt};
use lazy_static::lazy_static;
use regex::Regex;
use rustls::RootCertStore;
use urlencoding::decode;
use crate::{
client::client::{Client, PACKET_SIZE},
engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
utils::utils::decode_html_text,
};
@ -72,107 +71,11 @@ pub mod duckduckgo {
// }
// }
pub struct DuckDuckGoSearchStream<'a> {
pub ddg: &'a DuckDuckGo,
}
impl<'a> DuckDuckGoSearchStream<'a> {
pub fn new(ddg: &'a DuckDuckGo) -> Self {
Self { ddg }
}
}
#[async_trait]
impl Stream for DuckDuckGoSearchStream<'_> {
type Item = SearchResult;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Pending
}
}
impl DuckDuckGo {
pub fn get_stream<'a>(&'a self) -> impl Stream<Item = SearchResult> + 'a {
DuckDuckGoSearchStream { ddg: self }
impl EngineBase for DuckDuckGo {
fn add_result(&mut self, result: SearchResult) {
self.results.push(result);
}
fn slice_remaining_block(&mut self, start_position: &usize) {
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
self.previous_block.clear();
self.previous_block.push_str(&remaining_text);
}
pub fn new() -> Self {
Self {
callback: Box::new(|_: SearchResult| {}),
results_started: false,
previous_block: String::new(),
results: vec![],
completed: false,
}
}
pub fn set_callback(&mut self, callback: CallbackType) {
self.callback = callback;
}
pub async fn search(&mut self, query: &str) {
let client = reqwest::Client::new();
let mut stream = client
.post("https://html.duckduckgo.com/html/")
.header("Content-Type", "application/x-www-form-urlencoded")
.body(format!("q={}", query))
.send()
.await
.unwrap()
.bytes_stream();
dbg!("before");
while let Some(item) = stream.next().await {
let packet = item.unwrap();
dbg!(packet.len());
if let Some(result) = self.parse_packet(packet.iter()) {
self.results.push(result);
}
}
dbg!("after");
while let Some(result) = self.parse_next() {
self.results.push(result);
}
dbg!("competed");
self.completed = true;
}
pub fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
let raw_text = String::from_utf8_lossy(&bytes);
let text = STRIP.replace_all(&raw_text, " ");
if self.results_started {
self.previous_block.push_str(&text);
} else {
self.results_started = RESULTS_START.is_match(&text);
}
}
/// Push packet to internal block and return next available search result, if available
fn parse_packet<'a>(
&mut self,
packet: impl Iterator<Item = &'a u8>,
) -> Option<SearchResult> {
self.push_packet(packet);
self.parse_next()
}
/// Get next search result, if available, from internal stored block
fn parse_next<'a>(&mut self) -> Option<SearchResult> {
if self.results_started {
match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
@ -208,5 +111,188 @@ pub mod duckduckgo {
None
}
fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
let raw_text = String::from_utf8_lossy(&bytes);
let text = STRIP.replace_all(&raw_text, " ");
if self.results_started {
self.previous_block.push_str(&text);
} else {
self.results_started = RESULTS_START.is_match(&text);
}
}
// Searches DuckDuckGo for the given query
// Uses rustls as reqwest does not support accessing the raw packets
async fn search(&mut self, query: &str) {
let root_store =
RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
let mut config = rustls::ClientConfig::builder()
.with_root_certificates(root_store)
.with_no_client_auth();
// Allow using SSLKEYLOGFILE.
config.key_log = Arc::new(rustls::KeyLogFile::new());
let now = std::time::Instant::now();
let server_name = "html.duckduckgo.com".try_into().unwrap();
let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap();
let mut tls = rustls::Stream::new(&mut conn, &mut sock);
tls.write_all(
concat!(
"POST /html/ HTTP/1.1\r\n",
"Host: html.duckduckgo.com\r\n",
"Connection: cloSe\r\n",
"Accept-Encoding: identity\r\n",
"Content-Length: 6\r\n",
// form data
"Content-Type: application/x-www-form-urlencoded\r\n",
"\r\n",
"q=test",
)
.as_bytes(),
)
.unwrap();
let mut plaintext = Vec::new();
dbg!(now.elapsed());
loop {
let mut buf = [0; 65535];
tls.conn.complete_io(tls.sock);
let n = tls.conn.reader().read(&mut buf);
if n.is_ok() {
dbg!(&n);
let n = n.unwrap();
if n == 0 {
break;
}
println!("{}", "=================");
dbg!(now.elapsed());
// println!("{}", String::from_utf8_lossy(&buf));
plaintext.extend_from_slice(&buf);
}
}
// let root_store = RootCertStore {
// roots: webpki_roots::TLS_SERVER_ROOTS.into(),
// };
//
// let mut config = rustls::ClientConfig::builder()
// .with_root_certificates(root_store)
// .with_no_client_auth();
//
// // Allow using SSLKEYLOGFILE.
// config.key_log = Arc::new(rustls::KeyLogFile::new());
//
// let server_name = "html.duckduckgo.com".try_into().unwrap();
// let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
//
// let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap();
// let mut tls = rustls::Stream::new(&mut conn, &mut sock);
// tls.write_all(
// concat!(
// "POST /html/ HTTP/1.1\r\n",
// "Host: html.duckduckgo.com\r\n",
// "Connection: close\r\n",
// "Accept-Encoding: identity\r\n",
// "Content-Length: 6\r\n",
// // form data
// "Content-Type: application/x-www-form-urlencoded\r\n",
// "\r\n",
// "q=test",
// )
// .as_bytes(),
// )
// .unwrap();
// let ciphersuite = tls.conn.negotiated_cipher_suite().unwrap();
// writeln!(
// &mut std::io::stderr(),
// "Current ciphersuite: {:?}",
// ciphersuite.suite()
// )
// .unwrap();
//
// // Iterate over the stream to read the response.
// loop {
// let mut buf = [0u8; 1024];
// let n = tls.read(&mut buf).unwrap();
// if n == 0 {
// break;
// }
//
// if let Some(result) = self.parse_packet(buf.iter()) {
// self.add_result(result);
//
// // Wait one second
// std::thread::sleep(std::time::Duration::from_millis(100));
// }
// }
//
// while let Some(result) = self.parse_next() {
// self.add_result(result);
// }
//
// dbg!("done with searching");
// let client = reqwest::Client::new();
//
// let now = std::time::Instant::now();
//
// let mut stream = client
// .post("https://html.duckduckgo.com/html/")
// .header("Content-Type", "application/x-www-form-urlencoded")
// .body(format!("q={}", query))
// .send()
// .await
// .unwrap()
// .bytes_stream();
//
// let diff = now.elapsed();
// dbg!(diff);
//
// while let Some(item) = stream.next().await {
// let packet = item.unwrap();
//
// if let Some(result) = self.parse_packet(packet.iter()) {
// self.add_result(result);
// }
// }
//
// while let Some(result) = self.parse_next() {
// self.add_result(result);
// }
//
// let second_diff = now.elapsed();
// dbg!(second_diff);
}
}
impl DuckDuckGo {
fn slice_remaining_block(&mut self, start_position: &usize) {
let previous_block_bytes = self.previous_block.as_bytes().to_vec();
let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
let remaining_text = String::from_utf8(remaining_bytes).unwrap();
self.previous_block.clear();
self.previous_block.push_str(&remaining_text);
}
pub fn new() -> Self {
Self {
callback: Box::new(|_: SearchResult| {}),
results_started: false,
previous_block: String::new(),
results: vec![],
completed: false,
}
}
pub fn set_callback(&mut self, callback: CallbackType) {
self.callback = callback;
}
}
}

View File

@ -1,5 +1,14 @@
pub mod engine_base {
use async_trait::async_trait;
use bytes::Bytes;
use futures::{Stream, StreamExt};
use lazy_static::lazy_static;
use regex::Regex;
use reqwest::Error;
lazy_static! {
static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
}
#[derive(Clone, Copy, Debug, Hash)]
pub enum SearchEngine {
@ -14,9 +23,39 @@ pub mod engine_base {
pub engine: SearchEngine,
}
#[async_trait]
pub trait EngineBase {
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
fn add_result(&mut self, result: SearchResult);
fn parse_next<'a>(&mut self) -> Option<SearchResult>;
fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
// fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
// let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
// let raw_text = String::from_utf8_lossy(&bytes);
// let text = STRIP.replace_all(&raw_text, " ");
//
// if self.results_started {
// self.previous_block.push_str(&text);
// } else {
// self.results_started = RESULTS_START.is_match(&text);
// }
// }
/// Push packet to internal block and return next available search result, if available
fn parse_packet<'a>(
&mut self,
packet: impl Iterator<Item = &'a u8>,
) -> Option<SearchResult> {
self.push_packet(packet);
self.parse_next()
}
async fn search(&mut self, query: &str);
}
#[derive(Clone, Debug, Hash, Default)]
pub struct ResultsCollector {
results: Vec<SearchResult>,
}
}

View File

@ -19,21 +19,28 @@
// }
// Found no other way to make this work
#![feature(async_closure)]
use crate::engines::engine_base::engine_base::EngineBase;
use std::io::{Read, Write};
use std::net::TcpStream;
use std::str;
use std::sync::Arc;
use std::time::Instant;
use engines::brave::brave::Brave;
use futures::lock::Mutex;
use lazy_static::lazy_static;
use rocket::response::{content::RawHtml, stream::TextStream};
use engines::duckduckgo::duckduckgo::DuckDuckGo;
use rocket::response::{
content::{RawCss, RawHtml},
stream::TextStream,
};
use rustls::RootCertStore;
use crate::static_files::static_files::read_file_contents;
pub mod client;
pub mod engines;
pub mod static_files;
pub mod tsclient;
pub mod utils;
#[macro_use]
@ -63,21 +70,178 @@ fn search_get() -> &'static str {
}
#[get("/tailwind.css")]
fn get_tailwindcss() -> &'static str {
&TAILWIND_CSS
fn get_tailwindcss() -> RawCss<&'static str> {
RawCss(&TAILWIND_CSS)
}
#[get("/slow")]
async fn slow() -> &'static str {
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
"Slow"
}
#[get("/slowresponse")]
async fn slowresponse() -> TextStream![String] {
TextStream! {
yield "First".to_owned();
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
yield "second".to_owned();
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
yield "third".to_owned();
}
}
#[get("/searchquery?<query>")]
async fn hello<'a>(query: &str) -> RawHtml<TextStream![String]> {
let query_box = Box::new(query.to_string());
let now = Arc::new(Box::new(Instant::now()));
let ddg_ref = Arc::new(Mutex::new(DuckDuckGo::new()));
let completed_ref = Arc::new(Mutex::new(false));
let completed_ref_writer = completed_ref.clone();
let ddg_ref = Arc::new(Mutex::new(Brave::new()));
let ddg_ref_writer = ddg_ref.clone();
tokio::spawn(async move {
let mut ddg = ddg_ref_writer.lock().await;
let now_ref = now.clone();
ddg.search(&query_box).await;
tokio::spawn(async move {
// let root_store = RootCertStore {
// roots: webpki_roots::TLS_SERVER_ROOTS.into(),
// };
//
// let mut config = rustls::ClientConfig::builder()
// .with_root_certificates(root_store)
// .with_no_client_auth();
//
// // Allow using SSLKEYLOGFILE.
// config.key_log = Arc::new(rustls::KeyLogFile::new());
//
// let now = Instant::now();
// let server_name = "html.duckduckgo.com".try_into().unwrap();
// let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
//
// let mut sock = TcpStream::connect("html.duckduckgo.com:443".to_socket_addrs()).unwrap();
// let mut tls = rustls::Stream::new(&mut conn, &mut sock);
// tls.write_all(
// concat!(
// "POST /html/ HTTP/1.1\r\n",
// "Host: html.duckduckgo.com\r\n",
// "Connection: close\r\n",
// "Accept-Encoding: identity\r\n",
// "Content-Length: 6\r\n",
// // form data
// "Content-Type: application/x-www-form-urlencoded\r\n",
// "\r\n",
// "q=test",
// )
// .as_bytes(),
// )
// .unwrap();
// dbg!("Connected to DuckDuckGo");
// dbg!(now.elapsed());
//
// // Iterate over the stream to read the response in real time
//
// loop {
// if conn.wants_read() {
// conn.read_tls(&mut sock).unwrap();
// conn.process_new_packets().unwrap();
//
// let mut plaintext = Vec::new();
// conn.reader().read_to_end(&mut plaintext).unwrap();
// }
//
// if conn.wants_write() {
// conn.write_tls(&mut sock).unwrap();
// }
// sock.wa
// }
// loop {
// dbg!(now.elapsed());
// let mut buf = [0u8; 1024];
// let n = tls.read(&mut buf).unwrap();
// if n == 0 {
// break;
// }
//
// dbg!(now.elapsed());
//
// let mut ddg = ddg_ref_writer.lock().await;
// if let Some(result) = ddg.parse_packet(buf.iter()) {
// ddg.add_result(result);
// }
//
// // Release
// drop(ddg);
// tokio::task::yield_now().await;
// }
// dbg!("done with content");
// dbg!(now.elapsed());
//
// let mut ddg = ddg_ref_writer.lock().await;
// while let Some(result) = ddg.parse_next() {
// ddg.add_result(result);
// }
let root_store = RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
let mut config = rustls::ClientConfig::builder()
.with_root_certificates(root_store)
.with_no_client_auth();
// Allow using SSLKEYLOGFILE.
config.key_log = Arc::new(rustls::KeyLogFile::new());
let server_name = "search.brave.com".try_into().unwrap();
let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
let mut sock = TcpStream::connect("search.brave.com:443").unwrap();
let mut tls = rustls::Stream::new(&mut conn, &mut sock);
tls.write_all(
concat!(
"GET /search?q=test&show_local=0&source=unlocalise HTTP/1.1\r\n",
"Host: search.brave.com\r\n",
"Connection: close\r\n",
"Accept-Encoding: identity\r\n",
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.3\r\n",
"\r\n",
)
.as_bytes(),
)
.unwrap();
loop {
let mut buf = [0; 65535];
tls.conn.complete_io(tls.sock);
let n = tls.conn.reader().read(&mut buf);
// dbg!(&n);
if n.is_ok() {
let n = n.unwrap();
if n == 0 {
break;
}
// println!("{}", String::from_utf8_lossy(&buf));
let mut brave = ddg_ref_writer.lock().await;
if let Some(result) = brave.parse_packet(buf.iter()) {
println!("Brave: {}", now_ref.elapsed().as_millis());
brave.add_result(result);
drop(brave);
tokio::task::yield_now().await;
}
}
}
let mut completed = completed_ref_writer.lock().await;
*completed = true;
});
let mut current_index = 0;
@ -91,20 +255,27 @@ async fn hello<'a>(query: &str) -> RawHtml<TextStream![String]> {
let len = ddg.results.len();
if len == 0 {
drop(ddg);
tokio::task::yield_now().await;
continue
}
if ddg.completed && current_index == len - 1 {
let completed = completed_ref.lock().await;
if *completed && current_index == len - 1 {
break
}
drop(completed);
for ii in (current_index + 1)..len {
let result = ddg.results.get(ii).unwrap();
println!("Yield: {}", now.elapsed().as_millis());
let text = format!("<li><h1>{}</h1><p>{}</p></li>", &result.title, &result.description);
yield text.to_string();
}
drop(ddg);
tokio::task::yield_now().await;
// [1] -> 0
// 1 -> [1]
@ -122,4 +293,6 @@ async fn rocket() -> _ {
.mount("/", routes![hello])
.mount("/", routes![search_get])
.mount("/", routes![get_tailwindcss])
.mount("/", routes![slow])
.mount("/", routes![slowresponse])
}

174
src/tsclient.rs Normal file
View File

@ -0,0 +1,174 @@
pub mod tsclient {
use std::io::{self, BufReader, Read, Write};
use std::net::ToSocketAddrs;
use std::sync::Arc;
use std::time::Instant;
use std::{fs, process, str};
use mio::net::TcpStream;
use rustls::pki_types::{CertificateDer, PrivateKeyDer, ServerName};
use rustls::RootCertStore;
const CLIENT: mio::Token = mio::Token(0);
/// This encapsulates the TCP-level connection, some connection
/// state, and the underlying TLS-level session.
pub struct TlsClient {
socket: TcpStream,
closing: bool,
clean_closure: bool,
tls_conn: rustls::ClientConnection,
now: Instant,
}
impl TlsClient {
pub fn new(
sock: TcpStream,
server_name: ServerName<'static>,
cfg: Arc<rustls::ClientConfig>,
) -> Self {
Self {
socket: sock,
closing: false,
clean_closure: false,
tls_conn: rustls::ClientConnection::new(cfg, server_name).unwrap(),
now: Instant::now(),
}
}
/// Handles events sent to the TlsClient by mio::Poll
pub fn ready(&mut self, ev: &mio::event::Event) {
assert_eq!(ev.token(), CLIENT);
if ev.is_readable() {
self.do_read();
}
if ev.is_writable() {
self.do_write();
}
if self.is_closed() {
println!("Connection closed");
}
}
fn read_source_to_end(&mut self, rd: &mut dyn io::Read) -> io::Result<usize> {
let mut buf = Vec::new();
let len = rd.read_to_end(&mut buf)?;
self.tls_conn.writer().write_all(&buf).unwrap();
Ok(len)
}
/// We're ready to do a read.
fn do_read(&mut self) {
// Read TLS data. This fails if the underlying TCP connection
// is broken.
match self.tls_conn.read_tls(&mut self.socket) {
Err(error) => {
if error.kind() == io::ErrorKind::WouldBlock {
return;
}
println!("TLS read error: {:?}", error);
self.closing = true;
return;
}
// If we're ready but there's no data: EOF.
Ok(0) => {
self.closing = true;
self.clean_closure = true;
return;
}
Ok(_) => {}
};
// Reading some TLS data might have yielded new TLS
// messages to process. Errors from this indicate
// TLS protocol problems and are fatal.
let io_state = match self.tls_conn.process_new_packets() {
Ok(io_state) => io_state,
Err(err) => {
println!("TLS error: {:?}", err);
self.closing = true;
return;
}
};
// Having read some TLS data, and processed any new messages,
// we might have new plaintext as a result.
//
// Read it and then write it to stdout.
if io_state.plaintext_bytes_to_read() > 0 {
let mut plaintext = vec![0u8; io_state.plaintext_bytes_to_read()];
self.tls_conn.reader().read_exact(&mut plaintext).unwrap();
// dbg!(self.now.elapsed());
println!("{}", "=======");
}
// If that fails, the peer might have started a clean TLS-level
// session closure.
if io_state.peer_has_closed() {
self.clean_closure = true;
self.closing = true;
}
}
fn do_write(&mut self) {
self.tls_conn.write_tls(&mut self.socket).unwrap();
}
/// Registers self as a 'listener' in mio::Registry
pub fn register(&mut self, registry: &mio::Registry) {
let interest = self.event_set();
registry
.register(&mut self.socket, CLIENT, interest)
.unwrap();
}
/// Reregisters self as a 'listener' in mio::Registry.
pub fn reregister(&mut self, registry: &mio::Registry) {
let interest = self.event_set();
registry
.reregister(&mut self.socket, CLIENT, interest)
.unwrap();
}
/// Use wants_read/wants_write to register for different mio-level
/// IO readiness events.
fn event_set(&self) -> mio::Interest {
let rd = self.tls_conn.wants_read();
let wr = self.tls_conn.wants_write();
if rd && wr {
mio::Interest::READABLE | mio::Interest::WRITABLE
} else if wr {
mio::Interest::WRITABLE
} else {
mio::Interest::READABLE
}
}
pub fn is_closed(&self) -> bool {
self.closing
}
}
impl io::Write for TlsClient {
fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
self.tls_conn.writer().write(bytes)
}
fn flush(&mut self) -> io::Result<()> {
self.tls_conn.writer().flush()
}
}
impl io::Read for TlsClient {
fn read(&mut self, bytes: &mut [u8]) -> io::Result<usize> {
self.tls_conn.reader().read(bytes)
}
}
}