diff --git a/.gitignore b/.gitignore
index 158e489..42fcfb8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
/target
build
+
+./tailwindcss/node_modules/*
diff --git a/Cargo.lock b/Cargo.lock
index 73147ed..ac5f3c2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -80,6 +80,32 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+[[package]]
+name = "aws-lc-rs"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df33e4a55b03f8780ba55041bc7be91a2a8ec8c03517b0379d2d6c96d2c30d95"
+dependencies = [
+ "aws-lc-sys",
+ "mirai-annotations",
+ "paste",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5f269b176dc4aeb593910fa56ed6f956cde19542e496bb0bfc1ad9a6ce18815"
+dependencies = [
+ "bindgen",
+ "cmake",
+ "dunce",
+ "fs_extra",
+ "libc",
+ "paste",
+]
+
[[package]]
name = "backtrace"
version = "0.3.69"
@@ -107,6 +133,29 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72"
+[[package]]
+name = "bindgen"
+version = "0.69.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
+dependencies = [
+ "bitflags 2.4.2",
+ "cexpr",
+ "clang-sys",
+ "itertools",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn",
+ "which",
+]
+
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -146,12 +195,41 @@ dependencies = [
"libc",
]
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+[[package]]
+name = "clang-sys"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "cmake"
+version = "0.1.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "cookie"
version = "0.18.0"
@@ -221,6 +299,12 @@ dependencies = [
"syn",
]
+[[package]]
+name = "dunce"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b"
+
[[package]]
name = "either"
version = "1.9.0"
@@ -302,6 +386,12 @@ dependencies = [
"percent-encoding",
]
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
[[package]]
name = "futures"
version = "0.3.30"
@@ -458,6 +548,15 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
+[[package]]
+name = "home"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
[[package]]
name = "http"
version = "0.2.11"
@@ -573,6 +672,15 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
[[package]]
name = "itoa"
version = "1.0.10"
@@ -617,12 +725,28 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
+[[package]]
+name = "libloading"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
[[package]]
name = "linux-raw-sys"
version = "0.4.13"
@@ -681,6 +805,12 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@@ -697,10 +827,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09"
dependencies = [
"libc",
+ "log",
"wasi",
"windows-sys 0.48.0",
]
+[[package]]
+name = "mirai-annotations"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1"
+
[[package]]
name = "multer"
version = "2.1.0"
@@ -739,6 +876,16 @@ dependencies = [
"tempfile",
]
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -847,6 +994,12 @@ dependencies = [
"windows-targets 0.48.5",
]
+[[package]]
+name = "paste"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
+
[[package]]
name = "pear"
version = "0.2.8"
@@ -906,6 +1059,16 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+[[package]]
+name = "prettyplease"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
[[package]]
name = "proc-macro2"
version = "1.0.78"
@@ -1181,6 +1344,12 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
[[package]]
name = "rustix"
version = "0.38.30"
@@ -1196,12 +1365,11 @@ dependencies = [
[[package]]
name = "rustls"
-version = "0.22.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41"
+version = "0.23.0-alpha.0"
dependencies = [
+ "aws-lc-rs",
"log",
- "ring",
+ "once_cell",
"rustls-pki-types",
"rustls-webpki",
"subtle",
@@ -1209,17 +1377,28 @@ dependencies = [
]
[[package]]
-name = "rustls-pki-types"
-version = "1.1.0"
+name = "rustls-pemfile"
+version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a"
+checksum = "3c333bb734fcdedcea57de1602543590f545f127dc8b533324318fd492c5c70b"
+dependencies = [
+ "base64",
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7"
[[package]]
name = "rustls-webpki"
-version = "0.102.1"
+version = "0.102.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b"
+checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
dependencies = [
+ "aws-lc-rs",
"ring",
"rustls-pki-types",
"untrusted",
@@ -1342,6 +1521,12 @@ dependencies = [
"lazy_static",
]
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@@ -1443,13 +1628,16 @@ name = "tcp_test"
version = "0.1.0"
dependencies = [
"async-trait",
+ "bytes",
"futures",
"lazy-regex",
"lazy_static",
+ "mio",
"regex",
"reqwest",
"rocket",
"rustls",
+ "rustls-pemfile",
"tokio",
"urlencoding",
"webpki-roots",
@@ -1894,6 +2082,18 @@ dependencies = [
"rustls-pki-types",
]
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix",
+]
+
[[package]]
name = "winapi"
version = "0.3.9"
diff --git a/Cargo.toml b/Cargo.toml
index 129dd83..a6f4370 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,13 +7,16 @@ edition = "2021"
[dependencies]
async-trait = "0.1.77"
+bytes = "1.5.0"
futures = "0.3.30"
lazy-regex = "3.1.0"
lazy_static = "1.4.0"
+mio = { version = "0.8.10", features = ["net", "os-poll", ] }
regex = "1.10.3"
reqwest = { version = "0.11.23", features = ["stream"] }
rocket = "0.5.0"
-rustls = "0.22.2"
+rustls = { path = "../rustls/rustls", features = ["logging"] }
+rustls-pemfile = "2"
tokio = {version = "1.35.1", features = ["full"]}
urlencoding = "2.1.3"
webpki-roots = "0.26.0"
diff --git a/src/engines.rs b/src/engines.rs
index d55bb15..9311cee 100644
--- a/src/engines.rs
+++ b/src/engines.rs
@@ -1,2 +1,3 @@
+pub mod brave;
pub mod duckduckgo;
pub mod engine_base;
diff --git a/src/engines/brave.rs b/src/engines/brave.rs
new file mode 100644
index 0000000..85128a8
--- /dev/null
+++ b/src/engines/brave.rs
@@ -0,0 +1,109 @@
+// Search engine parser for Brave Search
+// This uses the clearnet, unlocalized version of the search engine.
+pub mod brave {
+ use lazy_static::lazy_static;
+ use regex::Regex;
+ use urlencoding::decode;
+
+ use crate::{
+ engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
+ utils::utils::decode_html_text,
+ };
+
+ lazy_static! {
+ static ref RESULTS_START: Regex = Regex::new(r#"
(?P.+?))|(class="h.? svelte.+?
.*?))"#).unwrap();
+ static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
+ static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
+ }
+
+ pub struct Brave {
+ pub completed: bool,
+ results_started: bool,
+ pub previous_block: String,
+ pub results: Vec,
+ }
+
+ impl Brave {
+ fn slice_remaining_block(&mut self, start_position: &usize) {
+ let previous_block_bytes = self.previous_block.as_bytes().to_vec();
+ let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
+ let remaining_text = String::from_utf8(remaining_bytes).unwrap();
+
+ self.previous_block.clear();
+ self.previous_block.push_str(&remaining_text);
+ }
+
+ pub fn new() -> Self {
+ Self {
+ results_started: false,
+ previous_block: String::new(),
+ results: vec![],
+ completed: false,
+ }
+ }
+ }
+
+ impl EngineBase for Brave {
+ fn add_result(&mut self, result: crate::engines::engine_base::engine_base::SearchResult) {
+ self.results.push(result);
+ }
+
+ fn parse_next<'a>(&mut self) -> Option {
+ if self.results_started {
+ match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
+ Some(captures) => {
+ let title = decode(
+ captures
+ .name("title")
+ .unwrap_or_else(|| captures.name("title2").unwrap())
+ .as_str(),
+ )
+ .unwrap()
+ .into_owned();
+ let description_raw =
+ decode_html_text(captures.name("description").unwrap().as_str())
+ .unwrap();
+ let description = STRIP_HTML_TAGS
+ .replace_all(&description_raw, "")
+ .into_owned();
+ let url = decode(captures.name("url").unwrap().as_str())
+ .unwrap()
+ .into_owned();
+
+ let result = SearchResult {
+ title,
+ description,
+ url,
+ engine: SearchEngine::DuckDuckGo,
+ };
+
+ let end_position = captures.name("end").unwrap().end();
+ self.slice_remaining_block(&end_position);
+
+ return Some(result);
+ }
+ None => {}
+ }
+ }
+
+ None
+ }
+
+ fn push_packet<'a>(&mut self, packet: impl Iterator- ) {
+ let bytes: Vec
= packet.map(|bit| *bit).collect();
+ let raw_text = String::from_utf8_lossy(&bytes);
+ let text = STRIP.replace_all(&raw_text, " ");
+
+ if self.results_started {
+ self.previous_block.push_str(&text);
+ } else {
+ self.results_started = RESULTS_START.is_match(&text);
+ }
+ }
+
+ async fn search(&mut self, query: &str) {
+ todo!()
+ }
+ }
+}
diff --git a/src/engines/brave_example.html b/src/engines/brave_example.html
new file mode 100644
index 0000000..777250e
--- /dev/null
+++ b/src/engines/brave_example.html
@@ -0,0 +1,110 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ test - Brave Search
+
+
+
+
+
+
diff --git a/src/engines/brave_example.prettified.html b/src/engines/brave_example.prettified.html
new file mode 100644
index 0000000..550d2ca
--- /dev/null
+++ b/src/engines/brave_example.prettified.html
@@ -0,0 +1,1858 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ test - Brave Search
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs
index 40da1c4..77cbad4 100644
--- a/src/engines/duckduckgo.rs
+++ b/src/engines/duckduckgo.rs
@@ -1,18 +1,17 @@
// Search engine parser for DuckDuckGo
pub mod duckduckgo {
use std::{
- pin::{pin, Pin},
- task::{Context, Poll},
+ io::{Read, Write},
+ net::TcpStream,
+ sync::Arc,
};
- use async_trait::async_trait;
- use futures::{AsyncRead, FutureExt, Stream, StreamExt};
use lazy_static::lazy_static;
use regex::Regex;
+ use rustls::RootCertStore;
use urlencoding::decode;
use crate::{
- client::client::{Client, PACKET_SIZE},
engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
utils::utils::decode_html_text,
};
@@ -72,107 +71,11 @@ pub mod duckduckgo {
// }
// }
- pub struct DuckDuckGoSearchStream<'a> {
- pub ddg: &'a DuckDuckGo,
- }
-
- impl<'a> DuckDuckGoSearchStream<'a> {
- pub fn new(ddg: &'a DuckDuckGo) -> Self {
- Self { ddg }
- }
- }
-
- #[async_trait]
- impl Stream for DuckDuckGoSearchStream<'_> {
- type Item = SearchResult;
-
- fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> {
- Poll::Pending
- }
- }
-
- impl DuckDuckGo {
- pub fn get_stream<'a>(&'a self) -> impl Stream- + 'a {
- DuckDuckGoSearchStream { ddg: self }
+ impl EngineBase for DuckDuckGo {
+ fn add_result(&mut self, result: SearchResult) {
+ self.results.push(result);
}
- fn slice_remaining_block(&mut self, start_position: &usize) {
- let previous_block_bytes = self.previous_block.as_bytes().to_vec();
- let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
- let remaining_text = String::from_utf8(remaining_bytes).unwrap();
-
- self.previous_block.clear();
- self.previous_block.push_str(&remaining_text);
- }
-
- pub fn new() -> Self {
- Self {
- callback: Box::new(|_: SearchResult| {}),
- results_started: false,
- previous_block: String::new(),
- results: vec![],
- completed: false,
- }
- }
-
- pub fn set_callback(&mut self, callback: CallbackType) {
- self.callback = callback;
- }
-
- pub async fn search(&mut self, query: &str) {
- let client = reqwest::Client::new();
-
- let mut stream = client
- .post("https://html.duckduckgo.com/html/")
- .header("Content-Type", "application/x-www-form-urlencoded")
- .body(format!("q={}", query))
- .send()
- .await
- .unwrap()
- .bytes_stream();
-
- dbg!("before");
- while let Some(item) = stream.next().await {
- let packet = item.unwrap();
- dbg!(packet.len());
-
- if let Some(result) = self.parse_packet(packet.iter()) {
- self.results.push(result);
- }
- }
- dbg!("after");
-
- while let Some(result) = self.parse_next() {
- self.results.push(result);
- }
- dbg!("competed");
-
- self.completed = true;
- }
-
- pub fn push_packet<'a>(&mut self, packet: impl Iterator
- ) {
- let bytes: Vec
= packet.map(|bit| *bit).collect();
- let raw_text = String::from_utf8_lossy(&bytes);
- let text = STRIP.replace_all(&raw_text, " ");
-
- if self.results_started {
- self.previous_block.push_str(&text);
- } else {
- self.results_started = RESULTS_START.is_match(&text);
- }
- }
-
- /// Push packet to internal block and return next available search result, if available
- fn parse_packet<'a>(
- &mut self,
- packet: impl Iterator- ,
- ) -> Option
{
- self.push_packet(packet);
-
- self.parse_next()
- }
-
- /// Get next search result, if available, from internal stored block
fn parse_next<'a>(&mut self) -> Option {
if self.results_started {
match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
@@ -208,5 +111,188 @@ pub mod duckduckgo {
None
}
+
+ fn push_packet<'a>(&mut self, packet: impl Iterator- ) {
+ let bytes: Vec
= packet.map(|bit| *bit).collect();
+ let raw_text = String::from_utf8_lossy(&bytes);
+ let text = STRIP.replace_all(&raw_text, " ");
+
+ if self.results_started {
+ self.previous_block.push_str(&text);
+ } else {
+ self.results_started = RESULTS_START.is_match(&text);
+ }
+ }
+
+ // Searches DuckDuckGo for the given query
+ // Uses rustls as reqwest does not support accessing the raw packets
+ async fn search(&mut self, query: &str) {
+ let root_store =
+ RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
+ let mut config = rustls::ClientConfig::builder()
+ .with_root_certificates(root_store)
+ .with_no_client_auth();
+
+ // Allow using SSLKEYLOGFILE.
+ config.key_log = Arc::new(rustls::KeyLogFile::new());
+
+ let now = std::time::Instant::now();
+ let server_name = "html.duckduckgo.com".try_into().unwrap();
+ let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
+ let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap();
+ let mut tls = rustls::Stream::new(&mut conn, &mut sock);
+ tls.write_all(
+ concat!(
+ "POST /html/ HTTP/1.1\r\n",
+ "Host: html.duckduckgo.com\r\n",
+ "Connection: cloSe\r\n",
+ "Accept-Encoding: identity\r\n",
+ "Content-Length: 6\r\n",
+ // form data
+ "Content-Type: application/x-www-form-urlencoded\r\n",
+ "\r\n",
+ "q=test",
+ )
+ .as_bytes(),
+ )
+ .unwrap();
+ let mut plaintext = Vec::new();
+ dbg!(now.elapsed());
+
+ loop {
+ let mut buf = [0; 65535];
+ tls.conn.complete_io(tls.sock);
+ let n = tls.conn.reader().read(&mut buf);
+
+ if n.is_ok() {
+ dbg!(&n);
+ let n = n.unwrap();
+ if n == 0 {
+ break;
+ }
+ println!("{}", "=================");
+ dbg!(now.elapsed());
+ // println!("{}", String::from_utf8_lossy(&buf));
+ plaintext.extend_from_slice(&buf);
+ }
+ }
+
+ // let root_store = RootCertStore {
+ // roots: webpki_roots::TLS_SERVER_ROOTS.into(),
+ // };
+ //
+ // let mut config = rustls::ClientConfig::builder()
+ // .with_root_certificates(root_store)
+ // .with_no_client_auth();
+ //
+ // // Allow using SSLKEYLOGFILE.
+ // config.key_log = Arc::new(rustls::KeyLogFile::new());
+ //
+ // let server_name = "html.duckduckgo.com".try_into().unwrap();
+ // let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
+ //
+ // let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap();
+ // let mut tls = rustls::Stream::new(&mut conn, &mut sock);
+ // tls.write_all(
+ // concat!(
+ // "POST /html/ HTTP/1.1\r\n",
+ // "Host: html.duckduckgo.com\r\n",
+ // "Connection: close\r\n",
+ // "Accept-Encoding: identity\r\n",
+ // "Content-Length: 6\r\n",
+ // // form data
+ // "Content-Type: application/x-www-form-urlencoded\r\n",
+ // "\r\n",
+ // "q=test",
+ // )
+ // .as_bytes(),
+ // )
+ // .unwrap();
+ // let ciphersuite = tls.conn.negotiated_cipher_suite().unwrap();
+ // writeln!(
+ // &mut std::io::stderr(),
+ // "Current ciphersuite: {:?}",
+ // ciphersuite.suite()
+ // )
+ // .unwrap();
+ //
+ // // Iterate over the stream to read the response.
+ // loop {
+ // let mut buf = [0u8; 1024];
+ // let n = tls.read(&mut buf).unwrap();
+ // if n == 0 {
+ // break;
+ // }
+ //
+ // if let Some(result) = self.parse_packet(buf.iter()) {
+ // self.add_result(result);
+ //
+ // // Wait one second
+ // std::thread::sleep(std::time::Duration::from_millis(100));
+ // }
+ // }
+ //
+ // while let Some(result) = self.parse_next() {
+ // self.add_result(result);
+ // }
+ //
+ // dbg!("done with searching");
+
+ // let client = reqwest::Client::new();
+ //
+ // let now = std::time::Instant::now();
+ //
+ // let mut stream = client
+ // .post("https://html.duckduckgo.com/html/")
+ // .header("Content-Type", "application/x-www-form-urlencoded")
+ // .body(format!("q={}", query))
+ // .send()
+ // .await
+ // .unwrap()
+ // .bytes_stream();
+ //
+ // let diff = now.elapsed();
+ // dbg!(diff);
+ //
+ // while let Some(item) = stream.next().await {
+ // let packet = item.unwrap();
+ //
+ // if let Some(result) = self.parse_packet(packet.iter()) {
+ // self.add_result(result);
+ // }
+ // }
+ //
+ // while let Some(result) = self.parse_next() {
+ // self.add_result(result);
+ // }
+ //
+ // let second_diff = now.elapsed();
+ // dbg!(second_diff);
+ }
+ }
+
+ impl DuckDuckGo {
+ fn slice_remaining_block(&mut self, start_position: &usize) {
+ let previous_block_bytes = self.previous_block.as_bytes().to_vec();
+ let remaining_bytes = previous_block_bytes[*start_position..].to_vec();
+ let remaining_text = String::from_utf8(remaining_bytes).unwrap();
+
+ self.previous_block.clear();
+ self.previous_block.push_str(&remaining_text);
+ }
+
+ pub fn new() -> Self {
+ Self {
+ callback: Box::new(|_: SearchResult| {}),
+ results_started: false,
+ previous_block: String::new(),
+ results: vec![],
+ completed: false,
+ }
+ }
+
+ pub fn set_callback(&mut self, callback: CallbackType) {
+ self.callback = callback;
+ }
}
}
diff --git a/src/engines/engine_base.rs b/src/engines/engine_base.rs
index 5f3a519..3503266 100644
--- a/src/engines/engine_base.rs
+++ b/src/engines/engine_base.rs
@@ -1,5 +1,14 @@
pub mod engine_base {
- use async_trait::async_trait;
+ use bytes::Bytes;
+
+ use futures::{Stream, StreamExt};
+ use lazy_static::lazy_static;
+ use regex::Regex;
+ use reqwest::Error;
+
+ lazy_static! {
+ static ref STRIP: Regex = Regex::new(r"\s+").unwrap();
+ }
#[derive(Clone, Copy, Debug, Hash)]
pub enum SearchEngine {
@@ -14,9 +23,39 @@ pub mod engine_base {
pub engine: SearchEngine,
}
- #[async_trait]
pub trait EngineBase {
- fn parse_packet<'a>(&mut self, packet: impl Iterator- );
+ fn add_result(&mut self, result: SearchResult);
+
+ fn parse_next<'a>(&mut self) -> Option
;
+
+ fn push_packet<'a>(&mut self, packet: impl Iterator- );
+ // fn push_packet<'a>(&mut self, packet: impl Iterator
- ) {
+ // let bytes: Vec
= packet.map(|bit| *bit).collect();
+ // let raw_text = String::from_utf8_lossy(&bytes);
+ // let text = STRIP.replace_all(&raw_text, " ");
+ //
+ // if self.results_started {
+ // self.previous_block.push_str(&text);
+ // } else {
+ // self.results_started = RESULTS_START.is_match(&text);
+ // }
+ // }
+
+ /// Push packet to internal block and return next available search result, if available
+ fn parse_packet<'a>(
+ &mut self,
+ packet: impl Iterator- ,
+ ) -> Option
{
+ self.push_packet(packet);
+
+ self.parse_next()
+ }
+
async fn search(&mut self, query: &str);
}
+
+ #[derive(Clone, Debug, Hash, Default)]
+ pub struct ResultsCollector {
+ results: Vec,
+ }
}
diff --git a/src/main.rs b/src/main.rs
index 0eaeb48..cf0cf3f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -19,21 +19,28 @@
// }
// Found no other way to make this work
-#![feature(async_closure)]
-
+use crate::engines::engine_base::engine_base::EngineBase;
+use std::io::{Read, Write};
+use std::net::TcpStream;
+use std::str;
use std::sync::Arc;
+use std::time::Instant;
+use engines::brave::brave::Brave;
use futures::lock::Mutex;
use lazy_static::lazy_static;
-use rocket::response::{content::RawHtml, stream::TextStream};
-
-use engines::duckduckgo::duckduckgo::DuckDuckGo;
+use rocket::response::{
+ content::{RawCss, RawHtml},
+ stream::TextStream,
+};
+use rustls::RootCertStore;
use crate::static_files::static_files::read_file_contents;
pub mod client;
pub mod engines;
pub mod static_files;
+pub mod tsclient;
pub mod utils;
#[macro_use]
@@ -63,21 +70,178 @@ fn search_get() -> &'static str {
}
#[get("/tailwind.css")]
-fn get_tailwindcss() -> &'static str {
- &TAILWIND_CSS
+fn get_tailwindcss() -> RawCss<&'static str> {
+ RawCss(&TAILWIND_CSS)
+}
+
+#[get("/slow")]
+async fn slow() -> &'static str {
+ tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
+ "Slow"
+}
+
+#[get("/slowresponse")]
+async fn slowresponse() -> TextStream![String] {
+ TextStream! {
+ yield "First".to_owned();
+
+ tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
+ yield "second".to_owned();
+
+ tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
+ yield "third".to_owned();
+ }
}
#[get("/searchquery?")]
async fn hello<'a>(query: &str) -> RawHtml {
let query_box = Box::new(query.to_string());
+ let now = Arc::new(Box::new(Instant::now()));
- let ddg_ref = Arc::new(Mutex::new(DuckDuckGo::new()));
+ let completed_ref = Arc::new(Mutex::new(false));
+ let completed_ref_writer = completed_ref.clone();
+ let ddg_ref = Arc::new(Mutex::new(Brave::new()));
let ddg_ref_writer = ddg_ref.clone();
- tokio::spawn(async move {
- let mut ddg = ddg_ref_writer.lock().await;
+ let now_ref = now.clone();
- ddg.search(&query_box).await;
+ tokio::spawn(async move {
+ // let root_store = RootCertStore {
+ // roots: webpki_roots::TLS_SERVER_ROOTS.into(),
+ // };
+ //
+ // let mut config = rustls::ClientConfig::builder()
+ // .with_root_certificates(root_store)
+ // .with_no_client_auth();
+ //
+ // // Allow using SSLKEYLOGFILE.
+ // config.key_log = Arc::new(rustls::KeyLogFile::new());
+ //
+ // let now = Instant::now();
+ // let server_name = "html.duckduckgo.com".try_into().unwrap();
+ // let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
+ //
+ // let mut sock = TcpStream::connect("html.duckduckgo.com:443".to_socket_addrs()).unwrap();
+ // let mut tls = rustls::Stream::new(&mut conn, &mut sock);
+ // tls.write_all(
+ // concat!(
+ // "POST /html/ HTTP/1.1\r\n",
+ // "Host: html.duckduckgo.com\r\n",
+ // "Connection: close\r\n",
+ // "Accept-Encoding: identity\r\n",
+ // "Content-Length: 6\r\n",
+ // // form data
+ // "Content-Type: application/x-www-form-urlencoded\r\n",
+ // "\r\n",
+ // "q=test",
+ // )
+ // .as_bytes(),
+ // )
+ // .unwrap();
+
+ // dbg!("Connected to DuckDuckGo");
+ // dbg!(now.elapsed());
+ //
+ // // Iterate over the stream to read the response in real time
+ //
+ // loop {
+ // if conn.wants_read() {
+ // conn.read_tls(&mut sock).unwrap();
+ // conn.process_new_packets().unwrap();
+ //
+ // let mut plaintext = Vec::new();
+ // conn.reader().read_to_end(&mut plaintext).unwrap();
+ // }
+ //
+ // if conn.wants_write() {
+ // conn.write_tls(&mut sock).unwrap();
+ // }
+ // sock.wa
+ // }
+
+ // loop {
+ // dbg!(now.elapsed());
+ // let mut buf = [0u8; 1024];
+ // let n = tls.read(&mut buf).unwrap();
+ // if n == 0 {
+ // break;
+ // }
+ //
+ // dbg!(now.elapsed());
+ //
+ // let mut ddg = ddg_ref_writer.lock().await;
+ // if let Some(result) = ddg.parse_packet(buf.iter()) {
+ // ddg.add_result(result);
+ // }
+ //
+ // // Release
+ // drop(ddg);
+ // tokio::task::yield_now().await;
+ // }
+
+ // dbg!("done with content");
+ // dbg!(now.elapsed());
+ //
+ // let mut ddg = ddg_ref_writer.lock().await;
+ // while let Some(result) = ddg.parse_next() {
+ // ddg.add_result(result);
+ // }
+
+ let root_store = RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
+ let mut config = rustls::ClientConfig::builder()
+ .with_root_certificates(root_store)
+ .with_no_client_auth();
+
+ // Allow using SSLKEYLOGFILE.
+ config.key_log = Arc::new(rustls::KeyLogFile::new());
+
+ let server_name = "search.brave.com".try_into().unwrap();
+ let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap();
+ let mut sock = TcpStream::connect("search.brave.com:443").unwrap();
+ let mut tls = rustls::Stream::new(&mut conn, &mut sock);
+ tls.write_all(
+ concat!(
+ "GET /search?q=test&show_local=0&source=unlocalise HTTP/1.1\r\n",
+ "Host: search.brave.com\r\n",
+ "Connection: close\r\n",
+ "Accept-Encoding: identity\r\n",
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.3\r\n",
+ "\r\n",
+ )
+ .as_bytes(),
+ )
+ .unwrap();
+
+ loop {
+ let mut buf = [0; 65535];
+ tls.conn.complete_io(tls.sock);
+ let n = tls.conn.reader().read(&mut buf);
+
+ // dbg!(&n);
+
+ if n.is_ok() {
+ let n = n.unwrap();
+ if n == 0 {
+ break;
+ }
+ // println!("{}", String::from_utf8_lossy(&buf));
+ let mut brave = ddg_ref_writer.lock().await;
+
+ if let Some(result) = brave.parse_packet(buf.iter()) {
+ println!("Brave: {}", now_ref.elapsed().as_millis());
+ brave.add_result(result);
+
+ drop(brave);
+ tokio::task::yield_now().await;
+ }
+ }
+ }
+
+ let mut completed = completed_ref_writer.lock().await;
+ *completed = true;
});
let mut current_index = 0;
@@ -91,20 +255,27 @@ async fn hello<'a>(query: &str) -> RawHtml {
let len = ddg.results.len();
if len == 0 {
+ drop(ddg);
+ tokio::task::yield_now().await;
continue
}
- if ddg.completed && current_index == len - 1 {
+ let completed = completed_ref.lock().await;
+ if *completed && current_index == len - 1 {
break
}
+ drop(completed);
for ii in (current_index + 1)..len {
let result = ddg.results.get(ii).unwrap();
+ println!("Yield: {}", now.elapsed().as_millis());
let text = format!("{} {}
", &result.title, &result.description);
yield text.to_string();
}
+ drop(ddg);
+ tokio::task::yield_now().await;
// [1] -> 0
// 1 -> [1]
@@ -122,4 +293,6 @@ async fn rocket() -> _ {
.mount("/", routes![hello])
.mount("/", routes![search_get])
.mount("/", routes![get_tailwindcss])
+ .mount("/", routes![slow])
+ .mount("/", routes![slowresponse])
}
diff --git a/src/tsclient.rs b/src/tsclient.rs
new file mode 100644
index 0000000..87ba9e8
--- /dev/null
+++ b/src/tsclient.rs
@@ -0,0 +1,174 @@
+pub mod tsclient {
+ use std::io::{self, BufReader, Read, Write};
+ use std::net::ToSocketAddrs;
+ use std::sync::Arc;
+ use std::time::Instant;
+ use std::{fs, process, str};
+
+ use mio::net::TcpStream;
+
+ use rustls::pki_types::{CertificateDer, PrivateKeyDer, ServerName};
+ use rustls::RootCertStore;
+
+ const CLIENT: mio::Token = mio::Token(0);
+
+ /// This encapsulates the TCP-level connection, some connection
+ /// state, and the underlying TLS-level session.
+ pub struct TlsClient {
+ socket: TcpStream,
+ closing: bool,
+ clean_closure: bool,
+ tls_conn: rustls::ClientConnection,
+ now: Instant,
+ }
+
+ impl TlsClient {
+ pub fn new(
+ sock: TcpStream,
+ server_name: ServerName<'static>,
+ cfg: Arc,
+ ) -> Self {
+ Self {
+ socket: sock,
+ closing: false,
+ clean_closure: false,
+ tls_conn: rustls::ClientConnection::new(cfg, server_name).unwrap(),
+ now: Instant::now(),
+ }
+ }
+
+ /// Handles events sent to the TlsClient by mio::Poll
+ pub fn ready(&mut self, ev: &mio::event::Event) {
+ assert_eq!(ev.token(), CLIENT);
+
+ if ev.is_readable() {
+ self.do_read();
+ }
+
+ if ev.is_writable() {
+ self.do_write();
+ }
+
+ if self.is_closed() {
+ println!("Connection closed");
+ }
+ }
+
+ fn read_source_to_end(&mut self, rd: &mut dyn io::Read) -> io::Result {
+ let mut buf = Vec::new();
+ let len = rd.read_to_end(&mut buf)?;
+ self.tls_conn.writer().write_all(&buf).unwrap();
+ Ok(len)
+ }
+
+ /// We're ready to do a read.
+ fn do_read(&mut self) {
+ // Read TLS data. This fails if the underlying TCP connection
+ // is broken.
+ match self.tls_conn.read_tls(&mut self.socket) {
+ Err(error) => {
+ if error.kind() == io::ErrorKind::WouldBlock {
+ return;
+ }
+ println!("TLS read error: {:?}", error);
+ self.closing = true;
+ return;
+ }
+
+ // If we're ready but there's no data: EOF.
+ Ok(0) => {
+ self.closing = true;
+ self.clean_closure = true;
+ return;
+ }
+
+ Ok(_) => {}
+ };
+
+ // Reading some TLS data might have yielded new TLS
+ // messages to process. Errors from this indicate
+ // TLS protocol problems and are fatal.
+ let io_state = match self.tls_conn.process_new_packets() {
+ Ok(io_state) => io_state,
+ Err(err) => {
+ println!("TLS error: {:?}", err);
+ self.closing = true;
+ return;
+ }
+ };
+
+ // Having read some TLS data, and processed any new messages,
+ // we might have new plaintext as a result.
+ //
+ // Read it and then write it to stdout.
+ if io_state.plaintext_bytes_to_read() > 0 {
+ let mut plaintext = vec![0u8; io_state.plaintext_bytes_to_read()];
+ self.tls_conn.reader().read_exact(&mut plaintext).unwrap();
+
+ // dbg!(self.now.elapsed());
+ println!("{}", "=======");
+ }
+
+ // If that fails, the peer might have started a clean TLS-level
+ // session closure.
+ if io_state.peer_has_closed() {
+ self.clean_closure = true;
+ self.closing = true;
+ }
+ }
+
+ fn do_write(&mut self) {
+ self.tls_conn.write_tls(&mut self.socket).unwrap();
+ }
+
+ /// Registers self as a 'listener' in mio::Registry
+ pub fn register(&mut self, registry: &mio::Registry) {
+ let interest = self.event_set();
+ registry
+ .register(&mut self.socket, CLIENT, interest)
+ .unwrap();
+ }
+
+ /// Reregisters self as a 'listener' in mio::Registry.
+ pub fn reregister(&mut self, registry: &mio::Registry) {
+ let interest = self.event_set();
+ registry
+ .reregister(&mut self.socket, CLIENT, interest)
+ .unwrap();
+ }
+
+ /// Use wants_read/wants_write to register for different mio-level
+ /// IO readiness events.
+ fn event_set(&self) -> mio::Interest {
+ let rd = self.tls_conn.wants_read();
+ let wr = self.tls_conn.wants_write();
+
+ if rd && wr {
+ mio::Interest::READABLE | mio::Interest::WRITABLE
+ } else if wr {
+ mio::Interest::WRITABLE
+ } else {
+ mio::Interest::READABLE
+ }
+ }
+
+ pub fn is_closed(&self) -> bool {
+ self.closing
+ }
+ }
+ impl io::Write for TlsClient {
+ fn write(&mut self, bytes: &[u8]) -> io::Result {
+ self.tls_conn.writer().write(bytes)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.tls_conn.writer().flush()
+ }
+ }
+
+ impl io::Read for TlsClient {
+ fn read(&mut self, bytes: &mut [u8]) -> io::Result {
+ self.tls_conn.reader().read(bytes)
+ }
+ }
+}