From e1bd09df44509090c850a247a25d27829f226108 Mon Sep 17 00:00:00 2001 From: Myzel394 <50424412+Myzel394@users.noreply.github.com> Date: Sun, 18 Feb 2024 21:50:51 +0100 Subject: [PATCH] feat: Add first real working wip for brave; With real real time processing --- .gitignore | 2 + Cargo.lock | 218 ++- Cargo.toml | 5 +- src/engines.rs | 1 + src/engines/brave.rs | 109 ++ src/engines/brave_example.html | 110 ++ src/engines/brave_example.prettified.html | 1858 +++++++++++++++++++++ src/engines/duckduckgo.rs | 294 ++-- src/engines/engine_base.rs | 45 +- src/main.rs | 197 ++- src/tsclient.rs | 174 ++ 11 files changed, 2884 insertions(+), 129 deletions(-) create mode 100644 src/engines/brave.rs create mode 100644 src/engines/brave_example.html create mode 100644 src/engines/brave_example.prettified.html create mode 100644 src/tsclient.rs diff --git a/.gitignore b/.gitignore index 158e489..42fcfb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target build + +./tailwindcss/node_modules/* diff --git a/Cargo.lock b/Cargo.lock index 73147ed..ac5f3c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,6 +80,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "aws-lc-rs" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df33e4a55b03f8780ba55041bc7be91a2a8ec8c03517b0379d2d6c96d2c30d95" +dependencies = [ + "aws-lc-sys", + "mirai-annotations", + "paste", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5f269b176dc4aeb593910fa56ed6f956cde19542e496bb0bfc1ad9a6ce18815" +dependencies = [ + "bindgen", + "cmake", + "dunce", + "fs_extra", + "libc", + "paste", +] + [[package]] name = "backtrace" version = "0.3.69" @@ -107,6 +133,29 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72" +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.4.2", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -146,12 +195,41 @@ dependencies = [ "libc", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + [[package]] name = "cookie" version = "0.18.0" @@ -221,6 +299,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dunce" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" + [[package]] name = "either" version = "1.9.0" @@ -302,6 +386,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.30" @@ -458,6 +548,15 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "http" version = "0.2.11" @@ -573,6 +672,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.10" @@ -617,12 +725,28 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -681,6 +805,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -697,10 +827,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.48.0", ] +[[package]] +name = "mirai-annotations" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" + [[package]] name = "multer" version = "2.1.0" @@ -739,6 +876,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -847,6 +994,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "pear" version = "0.2.8" @@ -906,6 +1059,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.78" @@ -1181,6 +1344,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.38.30" @@ -1196,12 +1365,11 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" +version = "0.23.0-alpha.0" dependencies = [ + "aws-lc-rs", "log", - "ring", + "once_cell", "rustls-pki-types", "rustls-webpki", "subtle", @@ -1209,17 +1377,28 @@ dependencies = [ ] [[package]] -name = "rustls-pki-types" -version = "1.1.0" +name = "rustls-pemfile" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" +checksum = "3c333bb734fcdedcea57de1602543590f545f127dc8b533324318fd492c5c70b" +dependencies = [ + "base64", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7" [[package]] name = "rustls-webpki" -version = "0.102.1" +version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b" +checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1342,6 +1521,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -1443,13 +1628,16 @@ name = "tcp_test" version = "0.1.0" dependencies = [ "async-trait", + "bytes", "futures", "lazy-regex", "lazy_static", + "mio", "regex", "reqwest", "rocket", "rustls", + "rustls-pemfile", "tokio", "urlencoding", "webpki-roots", @@ -1894,6 +2082,18 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 129dd83..a6f4370 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,13 +7,16 @@ edition = "2021" [dependencies] async-trait = "0.1.77" +bytes = "1.5.0" futures = "0.3.30" lazy-regex = "3.1.0" lazy_static = "1.4.0" +mio = { version = "0.8.10", features = ["net", "os-poll", ] } regex = "1.10.3" reqwest = { version = "0.11.23", features = ["stream"] } rocket = "0.5.0" -rustls = "0.22.2" +rustls = { path = "../rustls/rustls", features = ["logging"] } +rustls-pemfile = "2" tokio = {version = "1.35.1", features = ["full"]} urlencoding = "2.1.3" webpki-roots = "0.26.0" diff --git a/src/engines.rs b/src/engines.rs index d55bb15..9311cee 100644 --- a/src/engines.rs +++ b/src/engines.rs @@ -1,2 +1,3 @@ +pub mod brave; pub mod duckduckgo; pub mod engine_base; diff --git a/src/engines/brave.rs b/src/engines/brave.rs new file mode 100644 index 0000000..85128a8 --- /dev/null +++ b/src/engines/brave.rs @@ -0,0 +1,109 @@ +// Search engine parser for Brave Search +// This uses the clearnet, unlocalized version of the search engine. +pub mod brave { + use lazy_static::lazy_static; + use regex::Regex; + use urlencoding::decode; + + use crate::{ + engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult}, + utils::utils::decode_html_text, + }; + + lazy_static! { + static ref RESULTS_START: Regex = Regex::new(r#"(?P.+?))|(class="h.? svelte.+?<div class="title.+?>(?P<title2>.+?)<)).+?<!-- HTML_TAG_START -->(?P<description>.+?)<!-- HTML_TAG_END -->(?P<end>(.*?<\/div>( <\/div>){2})|(</div>.*?</div>.*?<div class="deep.*?data-sveltekit-reload.*?</div></div>.*?</div>))"#).unwrap(); + static ref STRIP: Regex = Regex::new(r"\s+").unwrap(); + static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap(); + } + + pub struct Brave { + pub completed: bool, + results_started: bool, + pub previous_block: String, + pub results: Vec<SearchResult>, + } + + impl Brave { + fn slice_remaining_block(&mut self, start_position: &usize) { + let previous_block_bytes = self.previous_block.as_bytes().to_vec(); + let remaining_bytes = previous_block_bytes[*start_position..].to_vec(); + let remaining_text = String::from_utf8(remaining_bytes).unwrap(); + + self.previous_block.clear(); + self.previous_block.push_str(&remaining_text); + } + + pub fn new() -> Self { + Self { + results_started: false, + previous_block: String::new(), + results: vec![], + completed: false, + } + } + } + + impl EngineBase for Brave { + fn add_result(&mut self, result: crate::engines::engine_base::engine_base::SearchResult) { + self.results.push(result); + } + + fn parse_next<'a>(&mut self) -> Option<SearchResult> { + if self.results_started { + match SINGLE_RESULT.captures(&self.previous_block.to_owned()) { + Some(captures) => { + let title = decode( + captures + .name("title") + .unwrap_or_else(|| captures.name("title2").unwrap()) + .as_str(), + ) + .unwrap() + .into_owned(); + let description_raw = + decode_html_text(captures.name("description").unwrap().as_str()) + .unwrap(); + let description = STRIP_HTML_TAGS + .replace_all(&description_raw, "") + .into_owned(); + let url = decode(captures.name("url").unwrap().as_str()) + .unwrap() + .into_owned(); + + let result = SearchResult { + title, + description, + url, + engine: SearchEngine::DuckDuckGo, + }; + + let end_position = captures.name("end").unwrap().end(); + self.slice_remaining_block(&end_position); + + return Some(result); + } + None => {} + } + } + + None + } + + fn push_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) { + let bytes: Vec<u8> = packet.map(|bit| *bit).collect(); + let raw_text = String::from_utf8_lossy(&bytes); + let text = STRIP.replace_all(&raw_text, " "); + + if self.results_started { + self.previous_block.push_str(&text); + } else { + self.results_started = RESULTS_START.is_match(&text); + } + } + + async fn search(&mut self, query: &str) { + todo!() + } + } +} diff --git a/src/engines/brave_example.html b/src/engines/brave_example.html new file mode 100644 index 0000000..777250e --- /dev/null +++ b/src/engines/brave_example.html @@ -0,0 +1,110 @@ +<!doctype html> +<html class="no-js " lang="en-us"> + <head> + <meta charset="utf-8" /> + <link href="https://cdn.search.brave.com" rel="preconnect" crossorigin="anonymous" /> +<link href="https://imgs.search.brave.com" rel="preconnect" crossorigin="anonymous" /> +<link href="https://tiles.search.com" rel="preconnect" crossorigin="anonymous" /> +<link rel="preload" as="font" type="font/woff2" href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/inter-latin-400-normal.GLYHyz0Z.woff2" crossorigin /> +<link rel="preload" as="font" type="font/woff2" href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/inter-latin-600-normal.bznh0S3M.woff2" crossorigin /> +<link rel="preload" as="font" type="font/woff2" href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/poppins_latin-normal-500.vDl5Y2SW.woff2" crossorigin /> +<link rel="preload" as="font" type="font/woff2" href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/poppins_latin-normal-600.MxJMQfTK.woff2" crossorigin /> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/0.DXSBl-wJ.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/button.wIyOA3gE.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Modal.-i1kEUmE.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Suggest.zfotXO53.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/BraveSupporter.5mhaV9Hv.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/FeedbackModal._TszuskL.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ContextMenuPane.NYrf_5AU.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/DSButton.rAkTeV5Q.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Switch.0RGFano1.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Footer.CetyMPc9.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ColumnLayout.TuYO-6_z.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/19.NUikwCju.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ServiceToggle._Kh8cIYh.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.wjF_fOyR.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ManualLocationPicker.KN6n6oxR.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/index.Yqwcmvzi.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/BaseResult.3yboMBOj.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Favicon.ndqyAq3m.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/CopyToClipboardButton.R6_f_z0L.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/InfoBanners.d9Vhyc__.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/FallbackSnippet.8vh-gdhz.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ExternalSearchButton.hgx49xBG.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/MakeDefault.cDgdngMw.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ShoppingCarouselAd.Jmlr4hyR.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/SimpleAd.OS_bctSg.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.KnMtDj_K.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/FullView.0uy-Gl-J.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/OpeningHours.lpwkzhJZ.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/PlaceRatingBlock.0xudcZbr.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/ReviewsSection.x3Ien6GL.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/LinkChip.D1gkXdOu.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/AccordionItem.TQxLbO_A.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.6IW7bmZC.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.znE95HLC.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.TfdDdzXf.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.eiCTqTJc.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.MRz2loeA.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.os-s65tj.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Generic.871SxTMJ.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/LongDescriptionSection.ebqRA5Oo.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Place.BO-WXg1l.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Codebox.OafieyAL.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.HymgDlyA.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.neI0LfX_.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/uPlotChart.eEHkYHVT.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/uPlot.ah9oG4mY.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.LYl3xyrr.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.kW7SrKiV.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/WidgetControl.uTDU44oi.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.YOHSSPKH.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.iZaWlI5n.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.g37qRcn7.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.Ha7omtJ9.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.pwu-NRbc.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.GMleSwUP.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.QoPwxEUf.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.UG--FqWQ.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.shLPu_mo.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/Image.pZxxQwB_.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.mM78D1QY.css" rel="stylesheet"> + <link href="https://cdn.search.brave.com/serp/v2/_app/immutable/assets/App.pYHF63Q9.css" rel="stylesheet"><title>test - Brave Search + + + +
test
tĕst
noun
  1. A hard external covering, as that of certain amoebas, dinoflagellates, and sea urchins.
  2. A procedure for critical evaluation; a means of determining the presence, quality, or truth of something; a trial.
  3. A series of questions, problems, or physical responses designed to determine knowledge, intelligence, or ability.
from The American Heritage® Dictionary of the English Language, 5th Edition. More at Wordnik
🌐
Merriam-Webster
merriam-webster.com › dictionary › test
Test Definition & Meaning - Merriam-Webster
August 12, 2023 - a means of testing: such as… See the full definition
🌐
Speedtest
speedtest.net
Speedtest by Ookla - The Global Broadband Speed Test
May 30, 2023 - Use Speedtest on all your devices with our free desktop and mobile apps.
🌐
Cambridge Dictionary
dictionary.cambridge.org › us › dictionary › english › test
TEST | definition in the Cambridge English Dictionary
August 2, 2023 - TEST meaning: 1. a way of discovering, by questions or practical activities, what someone knows, or what someone…. Learn more.
🌐
Wikipedia
en.wikipedia.org › wiki › Test
Test - Wikipedia
January 6, 2024 - Look up test, testing, Test, or TEST in Wiktionary, the free dictionary. ... Test (assessment), an educational assessment intended to measure the respondents' knowledge or other abilities ... Search for "Test" on Wikipedia. Tester (disambiguation), person or device that tests or measures
Find elsewhere
🌐
TimesNow
timesnownews.com › sports › cricket
Yashasvi Jaiswal Becomes First Player To Score Consecutive Test Double Hundreds But Not Win Man of the Match award | Cricket News, Times Now
2 hours ago - Yashasvi Jaiswal smashed a record-breaking knock of 214 in the third Test against England in Rajkot. Jaiswal is only the third Indian in history to score back-to-back double tons in Test cricket. Jaiswal achieved a rare feat with these consecutive double centuries.
🌐
SpringerLink
link.springer.com › home › test
Home | TEST
February 25, 2023 - Journal TEST focuses on cutting-edge research in all areas of Statistics and Probability with significant theoretical, methodological, and applied ...
🌐
FAST
fast.com
Internet Speed Test | Fast.com
How fast is your download speed? In seconds, FAST.com's simple Internet speed test will estimate your ISP speed.
🌐
Twitter
twitter.com › search
test on X
Explore Twitter's latest discussions on test
🌐
Collins Dictionary
collinsdictionary.com › us › dictionary › english › test
TEST definition in American English | Collins English Dictionary
14 senses: 1. to ascertain (the worth, capability, or endurance) of (a person or thing) by subjection to certain examinations;.... Click for more definitions.
Published: November 11, 2020
🌐
Merriam-Webster
merriam-webster.com › thesaurus › test
TEST Synonyms: 80 Similar Words | Merriam-Webster Thesaurus
Synonyms for TEST: experiment, experimentation, trial, try, essay, effort, practice, attempt, practise, exercise
🌐
Europa
health.ec.europa.eu › health-security-and-infectious-diseases › crisis-management › covid-19-diagnostic-tests_en
COVID-19 diagnostic tests
In the context of the COVID-19 pandemic, the Health Security Committee established in May 2021 a Technical working group on COVID-19 diagnostic tests
🌐
CDC
cdc.gov › coronavirus › 2019-ncov › testing › self-testing.html
Self-Testing At Home or Anywhere | CDC
May 11, 2023 - Learn how self-testing and collection allow you to collect a specimen at home and either send it to a testing facility or perform the test at home.
🌐
Harvard
implicit.harvard.edu › implicit › takeatest.html
Take a Test
March 24, 2023 - On the next page you'll be asked to select an Implicit Association Test (IAT) from a list of possible topics . We will also ask you (optionally) to report your attitudes or beliefs about these topics and provide some information about yourself.
🌐
CDC
cdc.gov › coronavirus › 2019-ncov › symptoms-testing › testing.html
COVID-19 Testing: What You Need to Know | CDC
July 13, 2023 - Learn about testing for COVID-19, including the types of tests, who should get tested, how to get tested, and print resources in English and Spanish.
🌐
BuzzFeed
buzzfeed.com › quizzes
BuzzFeed Quizzes
June 25, 2023 - We've got all the quizzes you love to binge! Come on in and hunker down for the long haul.
+ + +
+ + diff --git a/src/engines/brave_example.prettified.html b/src/engines/brave_example.prettified.html new file mode 100644 index 0000000..550d2ca --- /dev/null +++ b/src/engines/brave_example.prettified.html @@ -0,0 +1,1858 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + test - Brave Search + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
test
+
tĕst
+
+
+
noun
+
    +
  1. A hard external covering, + as that of certain amoebas, dinoflagellates, and sea + urchins.
  2. +
  3. A procedure for critical + evaluation; a means of determining the presence, quality, or truth of + something; a trial.
  4. +
  5. A series of questions, + problems, or physical responses designed to determine knowledge, + intelligence, or ability.
  6. + + + +
+
+ +
from The American Heritage® Dictionary of the English + Language, 5th Edition. More at Wordnik
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+ + + +
+
+
+
🌐
+
+
Speedtest
speedtest.net +
+
+
Speedtest by Ookla - The Global Broadband Speed + Test
+
+
+
+
May 30, 2023 - + Use Speedtest on all your devices with our free desktop + and mobile apps.
+
+
+ + +
+
+
+
🌐
+
+
Cambridge Dictionary
dictionary.cambridge.org + › us › dictionary › english › + test +
+
+
TEST | definition in the Cambridge English + Dictionary
+
+
+
+
August 2, 2023 - + TEST meaning: 1. a way of discovering, + by questions or practical activities, what someone knows, or what someone…. + Learn more.
+
+
+
+
+
+
🌐
+
+
Wikipedia
en.wikipedia.org › wiki › Test +
+
+
Test - Wikipedia
+
+
+
+
January 6, 2024 - + Look up test, testing, + Test, or TEST in Wiktionary, + the free dictionary. ... Test (assessment), an educational + assessment intended to measure the respondents' knowledge or other abilities ... + Search for "Test" on Wikipedia. Tester (disambiguation), person + or device that tests or measures
+
+ +
+
+
+
+ + Find elsewhere
+ +
+
+
+
+
+
🌐
+
+
TimesNow
timesnownews.com › sports › cricket + +
+
+
Yashasvi Jaiswal Becomes First Player To Score + Consecutive Test Double Hundreds But Not Win Man of the Match award | + Cricket News, Times Now
+
+
+
+
2 hours ago - + Yashasvi Jaiswal smashed a record-breaking knock of 214 + in the third Test against England in Rajkot. Jaiswal is only + the third Indian in history to score back-to-back double tons in + Test cricket. Jaiswal achieved a rare feat with these + consecutive double centuries.
+
+
+
+
+
+
🌐
+
+
SpringerLink
link.springer.com › home › test +
+
+
Home | TEST
+
+
+
+
February 25, 2023 - + Journal TEST focuses on cutting-edge + research in all areas of Statistics and Probability with significant + theoretical, methodological, and applied ...
+
+
+
+
+
+
🌐
+
+
FAST
fast.com +
+
+
Internet Speed Test | Fast.com
+
+
+
+
How + fast is your download speed? In seconds, FAST.com's simple Internet speed + test will estimate your ISP speed.
+
+
+
+
+
+
🌐
+
+
Twitter
twitter.com › search +
+
+
test on X
+
+
+
+
+ Explore Twitter's latest discussions on + test
+
+
+
+
+
+
🌐
+
+
Collins Dictionary
collinsdictionary.com › us › dictionary › english › + test +
+
+
TEST definition in American English | Collins + English Dictionary
+
+
+
 +
+ +
+
+
+ 14 senses: 1. to ascertain (the worth, capability, or + endurance) of (a person or thing) by subjection to certain examinations;.... + Click for more definitions.
+
+
Published: November 11, 2020
+
+
+
+
+
+
+
🌐
+
+
Merriam-Webster
merriam-webster.com › thesaurus › test + +
+
+
TEST Synonyms: 80 Similar Words | + Merriam-Webster Thesaurus
+
+
+
+
+ Synonyms for TEST: experiment, + experimentation, trial, try, essay, effort, practice, attempt, practise, + exercise
+
+
+
+
+
+
🌐
+
+
Europa
health.ec.europa.eu › + health-security-and-infectious-diseases › crisis-management › + covid-19-diagnostic-tests_en +
+
+
COVID-19 diagnostic tests
+
+
+
+
In + the context of the COVID-19 pandemic, the Health Security Committee established + in May 2021 a Technical working group on COVID-19 diagnostic + tests
+
+
+
+
+
+
🌐
+
+
CDC
cdc.gov › coronavirus › 2019-ncov › + testing › self-testing.html +
+
+
Self-Testing At Home or Anywhere | CDC
+
+
+
+
May 11, 2023 - + Learn how self-testing and collection + allow you to collect a specimen at home and either send it to a + testing facility or perform the test at + home.
+
+
+
+
+
+
🌐
+
+
Harvard
implicit.harvard.edu › implicit › + takeatest.html +
+
+
Take a Test
+
+
+
+
March 24, 2023 - + On the next page you'll be asked to select an Implicit + Association Test (IAT) from a list of possible topics . We will + also ask you (optionally) to report your attitudes or beliefs about these topics + and provide some information about yourself.
+
+
+
+
+
+
🌐
+
+
CDC
cdc.gov › coronavirus › 2019-ncov › + symptoms-testing › testing.html +
+
+
COVID-19 Testing: What You Need to Know | CDC +
+
+
+
+
July 13, 2023 - + Learn about testing for COVID-19, + including the types of tests, who should get + tested, how to get tested, and print resources + in English and Spanish.
+
+
+
+
+
+
🌐
+
+
BuzzFeed
buzzfeed.com › quizzes +
+
+
BuzzFeed Quizzes
+
+
+
+
June 25, 2023 - + We've got all the quizzes you love to binge! Come on + in and hunker down for the long haul.
+
+
+ + +
+
+ +
+
+
+
+ + + +
+ + + diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 40da1c4..77cbad4 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -1,18 +1,17 @@ // Search engine parser for DuckDuckGo pub mod duckduckgo { use std::{ - pin::{pin, Pin}, - task::{Context, Poll}, + io::{Read, Write}, + net::TcpStream, + sync::Arc, }; - use async_trait::async_trait; - use futures::{AsyncRead, FutureExt, Stream, StreamExt}; use lazy_static::lazy_static; use regex::Regex; + use rustls::RootCertStore; use urlencoding::decode; use crate::{ - client::client::{Client, PACKET_SIZE}, engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult}, utils::utils::decode_html_text, }; @@ -72,107 +71,11 @@ pub mod duckduckgo { // } // } - pub struct DuckDuckGoSearchStream<'a> { - pub ddg: &'a DuckDuckGo, - } - - impl<'a> DuckDuckGoSearchStream<'a> { - pub fn new(ddg: &'a DuckDuckGo) -> Self { - Self { ddg } - } - } - - #[async_trait] - impl Stream for DuckDuckGoSearchStream<'_> { - type Item = SearchResult; - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Poll::Pending - } - } - - impl DuckDuckGo { - pub fn get_stream<'a>(&'a self) -> impl Stream + 'a { - DuckDuckGoSearchStream { ddg: self } + impl EngineBase for DuckDuckGo { + fn add_result(&mut self, result: SearchResult) { + self.results.push(result); } - fn slice_remaining_block(&mut self, start_position: &usize) { - let previous_block_bytes = self.previous_block.as_bytes().to_vec(); - let remaining_bytes = previous_block_bytes[*start_position..].to_vec(); - let remaining_text = String::from_utf8(remaining_bytes).unwrap(); - - self.previous_block.clear(); - self.previous_block.push_str(&remaining_text); - } - - pub fn new() -> Self { - Self { - callback: Box::new(|_: SearchResult| {}), - results_started: false, - previous_block: String::new(), - results: vec![], - completed: false, - } - } - - pub fn set_callback(&mut self, callback: CallbackType) { - self.callback = callback; - } - - pub async fn search(&mut self, query: &str) { - let client = reqwest::Client::new(); - - let mut stream = client - .post("https://html.duckduckgo.com/html/") - .header("Content-Type", "application/x-www-form-urlencoded") - .body(format!("q={}", query)) - .send() - .await - .unwrap() - .bytes_stream(); - - dbg!("before"); - while let Some(item) = stream.next().await { - let packet = item.unwrap(); - dbg!(packet.len()); - - if let Some(result) = self.parse_packet(packet.iter()) { - self.results.push(result); - } - } - dbg!("after"); - - while let Some(result) = self.parse_next() { - self.results.push(result); - } - dbg!("competed"); - - self.completed = true; - } - - pub fn push_packet<'a>(&mut self, packet: impl Iterator) { - let bytes: Vec = packet.map(|bit| *bit).collect(); - let raw_text = String::from_utf8_lossy(&bytes); - let text = STRIP.replace_all(&raw_text, " "); - - if self.results_started { - self.previous_block.push_str(&text); - } else { - self.results_started = RESULTS_START.is_match(&text); - } - } - - /// Push packet to internal block and return next available search result, if available - fn parse_packet<'a>( - &mut self, - packet: impl Iterator, - ) -> Option { - self.push_packet(packet); - - self.parse_next() - } - - /// Get next search result, if available, from internal stored block fn parse_next<'a>(&mut self) -> Option { if self.results_started { match SINGLE_RESULT.captures(&self.previous_block.to_owned()) { @@ -208,5 +111,188 @@ pub mod duckduckgo { None } + + fn push_packet<'a>(&mut self, packet: impl Iterator) { + let bytes: Vec = packet.map(|bit| *bit).collect(); + let raw_text = String::from_utf8_lossy(&bytes); + let text = STRIP.replace_all(&raw_text, " "); + + if self.results_started { + self.previous_block.push_str(&text); + } else { + self.results_started = RESULTS_START.is_match(&text); + } + } + + // Searches DuckDuckGo for the given query + // Uses rustls as reqwest does not support accessing the raw packets + async fn search(&mut self, query: &str) { + let root_store = + RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + let mut config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + + // Allow using SSLKEYLOGFILE. + config.key_log = Arc::new(rustls::KeyLogFile::new()); + + let now = std::time::Instant::now(); + let server_name = "html.duckduckgo.com".try_into().unwrap(); + let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap(); + let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap(); + let mut tls = rustls::Stream::new(&mut conn, &mut sock); + tls.write_all( + concat!( + "POST /html/ HTTP/1.1\r\n", + "Host: html.duckduckgo.com\r\n", + "Connection: cloSe\r\n", + "Accept-Encoding: identity\r\n", + "Content-Length: 6\r\n", + // form data + "Content-Type: application/x-www-form-urlencoded\r\n", + "\r\n", + "q=test", + ) + .as_bytes(), + ) + .unwrap(); + let mut plaintext = Vec::new(); + dbg!(now.elapsed()); + + loop { + let mut buf = [0; 65535]; + tls.conn.complete_io(tls.sock); + let n = tls.conn.reader().read(&mut buf); + + if n.is_ok() { + dbg!(&n); + let n = n.unwrap(); + if n == 0 { + break; + } + println!("{}", "================="); + dbg!(now.elapsed()); + // println!("{}", String::from_utf8_lossy(&buf)); + plaintext.extend_from_slice(&buf); + } + } + + // let root_store = RootCertStore { + // roots: webpki_roots::TLS_SERVER_ROOTS.into(), + // }; + // + // let mut config = rustls::ClientConfig::builder() + // .with_root_certificates(root_store) + // .with_no_client_auth(); + // + // // Allow using SSLKEYLOGFILE. + // config.key_log = Arc::new(rustls::KeyLogFile::new()); + // + // let server_name = "html.duckduckgo.com".try_into().unwrap(); + // let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap(); + // + // let mut sock = TcpStream::connect("html.duckduckgo.com:443").unwrap(); + // let mut tls = rustls::Stream::new(&mut conn, &mut sock); + // tls.write_all( + // concat!( + // "POST /html/ HTTP/1.1\r\n", + // "Host: html.duckduckgo.com\r\n", + // "Connection: close\r\n", + // "Accept-Encoding: identity\r\n", + // "Content-Length: 6\r\n", + // // form data + // "Content-Type: application/x-www-form-urlencoded\r\n", + // "\r\n", + // "q=test", + // ) + // .as_bytes(), + // ) + // .unwrap(); + // let ciphersuite = tls.conn.negotiated_cipher_suite().unwrap(); + // writeln!( + // &mut std::io::stderr(), + // "Current ciphersuite: {:?}", + // ciphersuite.suite() + // ) + // .unwrap(); + // + // // Iterate over the stream to read the response. + // loop { + // let mut buf = [0u8; 1024]; + // let n = tls.read(&mut buf).unwrap(); + // if n == 0 { + // break; + // } + // + // if let Some(result) = self.parse_packet(buf.iter()) { + // self.add_result(result); + // + // // Wait one second + // std::thread::sleep(std::time::Duration::from_millis(100)); + // } + // } + // + // while let Some(result) = self.parse_next() { + // self.add_result(result); + // } + // + // dbg!("done with searching"); + + // let client = reqwest::Client::new(); + // + // let now = std::time::Instant::now(); + // + // let mut stream = client + // .post("https://html.duckduckgo.com/html/") + // .header("Content-Type", "application/x-www-form-urlencoded") + // .body(format!("q={}", query)) + // .send() + // .await + // .unwrap() + // .bytes_stream(); + // + // let diff = now.elapsed(); + // dbg!(diff); + // + // while let Some(item) = stream.next().await { + // let packet = item.unwrap(); + // + // if let Some(result) = self.parse_packet(packet.iter()) { + // self.add_result(result); + // } + // } + // + // while let Some(result) = self.parse_next() { + // self.add_result(result); + // } + // + // let second_diff = now.elapsed(); + // dbg!(second_diff); + } + } + + impl DuckDuckGo { + fn slice_remaining_block(&mut self, start_position: &usize) { + let previous_block_bytes = self.previous_block.as_bytes().to_vec(); + let remaining_bytes = previous_block_bytes[*start_position..].to_vec(); + let remaining_text = String::from_utf8(remaining_bytes).unwrap(); + + self.previous_block.clear(); + self.previous_block.push_str(&remaining_text); + } + + pub fn new() -> Self { + Self { + callback: Box::new(|_: SearchResult| {}), + results_started: false, + previous_block: String::new(), + results: vec![], + completed: false, + } + } + + pub fn set_callback(&mut self, callback: CallbackType) { + self.callback = callback; + } } } diff --git a/src/engines/engine_base.rs b/src/engines/engine_base.rs index 5f3a519..3503266 100644 --- a/src/engines/engine_base.rs +++ b/src/engines/engine_base.rs @@ -1,5 +1,14 @@ pub mod engine_base { - use async_trait::async_trait; + use bytes::Bytes; + + use futures::{Stream, StreamExt}; + use lazy_static::lazy_static; + use regex::Regex; + use reqwest::Error; + + lazy_static! { + static ref STRIP: Regex = Regex::new(r"\s+").unwrap(); + } #[derive(Clone, Copy, Debug, Hash)] pub enum SearchEngine { @@ -14,9 +23,39 @@ pub mod engine_base { pub engine: SearchEngine, } - #[async_trait] pub trait EngineBase { - fn parse_packet<'a>(&mut self, packet: impl Iterator); + fn add_result(&mut self, result: SearchResult); + + fn parse_next<'a>(&mut self) -> Option; + + fn push_packet<'a>(&mut self, packet: impl Iterator); + // fn push_packet<'a>(&mut self, packet: impl Iterator) { + // let bytes: Vec = packet.map(|bit| *bit).collect(); + // let raw_text = String::from_utf8_lossy(&bytes); + // let text = STRIP.replace_all(&raw_text, " "); + // + // if self.results_started { + // self.previous_block.push_str(&text); + // } else { + // self.results_started = RESULTS_START.is_match(&text); + // } + // } + + /// Push packet to internal block and return next available search result, if available + fn parse_packet<'a>( + &mut self, + packet: impl Iterator, + ) -> Option { + self.push_packet(packet); + + self.parse_next() + } + async fn search(&mut self, query: &str); } + + #[derive(Clone, Debug, Hash, Default)] + pub struct ResultsCollector { + results: Vec, + } } diff --git a/src/main.rs b/src/main.rs index 0eaeb48..cf0cf3f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,21 +19,28 @@ // } // Found no other way to make this work -#![feature(async_closure)] - +use crate::engines::engine_base::engine_base::EngineBase; +use std::io::{Read, Write}; +use std::net::TcpStream; +use std::str; use std::sync::Arc; +use std::time::Instant; +use engines::brave::brave::Brave; use futures::lock::Mutex; use lazy_static::lazy_static; -use rocket::response::{content::RawHtml, stream::TextStream}; - -use engines::duckduckgo::duckduckgo::DuckDuckGo; +use rocket::response::{ + content::{RawCss, RawHtml}, + stream::TextStream, +}; +use rustls::RootCertStore; use crate::static_files::static_files::read_file_contents; pub mod client; pub mod engines; pub mod static_files; +pub mod tsclient; pub mod utils; #[macro_use] @@ -63,21 +70,178 @@ fn search_get() -> &'static str { } #[get("/tailwind.css")] -fn get_tailwindcss() -> &'static str { - &TAILWIND_CSS +fn get_tailwindcss() -> RawCss<&'static str> { + RawCss(&TAILWIND_CSS) +} + +#[get("/slow")] +async fn slow() -> &'static str { + tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; + + "Slow" +} + +#[get("/slowresponse")] +async fn slowresponse() -> TextStream![String] { + TextStream! { + yield "First".to_owned(); + + tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; + + yield "second".to_owned(); + + tokio::time::sleep(tokio::time::Duration::from_secs(5)).await; + + yield "third".to_owned(); + } } #[get("/searchquery?")] async fn hello<'a>(query: &str) -> RawHtml { let query_box = Box::new(query.to_string()); + let now = Arc::new(Box::new(Instant::now())); - let ddg_ref = Arc::new(Mutex::new(DuckDuckGo::new())); + let completed_ref = Arc::new(Mutex::new(false)); + let completed_ref_writer = completed_ref.clone(); + let ddg_ref = Arc::new(Mutex::new(Brave::new())); let ddg_ref_writer = ddg_ref.clone(); - tokio::spawn(async move { - let mut ddg = ddg_ref_writer.lock().await; + let now_ref = now.clone(); - ddg.search(&query_box).await; + tokio::spawn(async move { + // let root_store = RootCertStore { + // roots: webpki_roots::TLS_SERVER_ROOTS.into(), + // }; + // + // let mut config = rustls::ClientConfig::builder() + // .with_root_certificates(root_store) + // .with_no_client_auth(); + // + // // Allow using SSLKEYLOGFILE. + // config.key_log = Arc::new(rustls::KeyLogFile::new()); + // + // let now = Instant::now(); + // let server_name = "html.duckduckgo.com".try_into().unwrap(); + // let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap(); + // + // let mut sock = TcpStream::connect("html.duckduckgo.com:443".to_socket_addrs()).unwrap(); + // let mut tls = rustls::Stream::new(&mut conn, &mut sock); + // tls.write_all( + // concat!( + // "POST /html/ HTTP/1.1\r\n", + // "Host: html.duckduckgo.com\r\n", + // "Connection: close\r\n", + // "Accept-Encoding: identity\r\n", + // "Content-Length: 6\r\n", + // // form data + // "Content-Type: application/x-www-form-urlencoded\r\n", + // "\r\n", + // "q=test", + // ) + // .as_bytes(), + // ) + // .unwrap(); + + // dbg!("Connected to DuckDuckGo"); + // dbg!(now.elapsed()); + // + // // Iterate over the stream to read the response in real time + // + // loop { + // if conn.wants_read() { + // conn.read_tls(&mut sock).unwrap(); + // conn.process_new_packets().unwrap(); + // + // let mut plaintext = Vec::new(); + // conn.reader().read_to_end(&mut plaintext).unwrap(); + // } + // + // if conn.wants_write() { + // conn.write_tls(&mut sock).unwrap(); + // } + // sock.wa + // } + + // loop { + // dbg!(now.elapsed()); + // let mut buf = [0u8; 1024]; + // let n = tls.read(&mut buf).unwrap(); + // if n == 0 { + // break; + // } + // + // dbg!(now.elapsed()); + // + // let mut ddg = ddg_ref_writer.lock().await; + // if let Some(result) = ddg.parse_packet(buf.iter()) { + // ddg.add_result(result); + // } + // + // // Release + // drop(ddg); + // tokio::task::yield_now().await; + // } + + // dbg!("done with content"); + // dbg!(now.elapsed()); + // + // let mut ddg = ddg_ref_writer.lock().await; + // while let Some(result) = ddg.parse_next() { + // ddg.add_result(result); + // } + + let root_store = RootCertStore::from_iter(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + let mut config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + + // Allow using SSLKEYLOGFILE. + config.key_log = Arc::new(rustls::KeyLogFile::new()); + + let server_name = "search.brave.com".try_into().unwrap(); + let mut conn = rustls::ClientConnection::new(Arc::new(config), server_name).unwrap(); + let mut sock = TcpStream::connect("search.brave.com:443").unwrap(); + let mut tls = rustls::Stream::new(&mut conn, &mut sock); + tls.write_all( + concat!( + "GET /search?q=test&show_local=0&source=unlocalise HTTP/1.1\r\n", + "Host: search.brave.com\r\n", + "Connection: close\r\n", + "Accept-Encoding: identity\r\n", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.3\r\n", + "\r\n", + ) + .as_bytes(), + ) + .unwrap(); + + loop { + let mut buf = [0; 65535]; + tls.conn.complete_io(tls.sock); + let n = tls.conn.reader().read(&mut buf); + + // dbg!(&n); + + if n.is_ok() { + let n = n.unwrap(); + if n == 0 { + break; + } + // println!("{}", String::from_utf8_lossy(&buf)); + let mut brave = ddg_ref_writer.lock().await; + + if let Some(result) = brave.parse_packet(buf.iter()) { + println!("Brave: {}", now_ref.elapsed().as_millis()); + brave.add_result(result); + + drop(brave); + tokio::task::yield_now().await; + } + } + } + + let mut completed = completed_ref_writer.lock().await; + *completed = true; }); let mut current_index = 0; @@ -91,20 +255,27 @@ async fn hello<'a>(query: &str) -> RawHtml { let len = ddg.results.len(); if len == 0 { + drop(ddg); + tokio::task::yield_now().await; continue } - if ddg.completed && current_index == len - 1 { + let completed = completed_ref.lock().await; + if *completed && current_index == len - 1 { break } + drop(completed); for ii in (current_index + 1)..len { let result = ddg.results.get(ii).unwrap(); + println!("Yield: {}", now.elapsed().as_millis()); let text = format!("
  • {}

    {}

  • ", &result.title, &result.description); yield text.to_string(); } + drop(ddg); + tokio::task::yield_now().await; // [1] -> 0 // 1 -> [1] @@ -122,4 +293,6 @@ async fn rocket() -> _ { .mount("/", routes![hello]) .mount("/", routes![search_get]) .mount("/", routes![get_tailwindcss]) + .mount("/", routes![slow]) + .mount("/", routes![slowresponse]) } diff --git a/src/tsclient.rs b/src/tsclient.rs new file mode 100644 index 0000000..87ba9e8 --- /dev/null +++ b/src/tsclient.rs @@ -0,0 +1,174 @@ +pub mod tsclient { + use std::io::{self, BufReader, Read, Write}; + use std::net::ToSocketAddrs; + use std::sync::Arc; + use std::time::Instant; + use std::{fs, process, str}; + + use mio::net::TcpStream; + + use rustls::pki_types::{CertificateDer, PrivateKeyDer, ServerName}; + use rustls::RootCertStore; + + const CLIENT: mio::Token = mio::Token(0); + + /// This encapsulates the TCP-level connection, some connection + /// state, and the underlying TLS-level session. + pub struct TlsClient { + socket: TcpStream, + closing: bool, + clean_closure: bool, + tls_conn: rustls::ClientConnection, + now: Instant, + } + + impl TlsClient { + pub fn new( + sock: TcpStream, + server_name: ServerName<'static>, + cfg: Arc, + ) -> Self { + Self { + socket: sock, + closing: false, + clean_closure: false, + tls_conn: rustls::ClientConnection::new(cfg, server_name).unwrap(), + now: Instant::now(), + } + } + + /// Handles events sent to the TlsClient by mio::Poll + pub fn ready(&mut self, ev: &mio::event::Event) { + assert_eq!(ev.token(), CLIENT); + + if ev.is_readable() { + self.do_read(); + } + + if ev.is_writable() { + self.do_write(); + } + + if self.is_closed() { + println!("Connection closed"); + } + } + + fn read_source_to_end(&mut self, rd: &mut dyn io::Read) -> io::Result { + let mut buf = Vec::new(); + let len = rd.read_to_end(&mut buf)?; + self.tls_conn.writer().write_all(&buf).unwrap(); + Ok(len) + } + + /// We're ready to do a read. + fn do_read(&mut self) { + // Read TLS data. This fails if the underlying TCP connection + // is broken. + match self.tls_conn.read_tls(&mut self.socket) { + Err(error) => { + if error.kind() == io::ErrorKind::WouldBlock { + return; + } + println!("TLS read error: {:?}", error); + self.closing = true; + return; + } + + // If we're ready but there's no data: EOF. + Ok(0) => { + self.closing = true; + self.clean_closure = true; + return; + } + + Ok(_) => {} + }; + + // Reading some TLS data might have yielded new TLS + // messages to process. Errors from this indicate + // TLS protocol problems and are fatal. + let io_state = match self.tls_conn.process_new_packets() { + Ok(io_state) => io_state, + Err(err) => { + println!("TLS error: {:?}", err); + self.closing = true; + return; + } + }; + + // Having read some TLS data, and processed any new messages, + // we might have new plaintext as a result. + // + // Read it and then write it to stdout. + if io_state.plaintext_bytes_to_read() > 0 { + let mut plaintext = vec![0u8; io_state.plaintext_bytes_to_read()]; + self.tls_conn.reader().read_exact(&mut plaintext).unwrap(); + + // dbg!(self.now.elapsed()); + println!("{}", "======="); + } + + // If that fails, the peer might have started a clean TLS-level + // session closure. + if io_state.peer_has_closed() { + self.clean_closure = true; + self.closing = true; + } + } + + fn do_write(&mut self) { + self.tls_conn.write_tls(&mut self.socket).unwrap(); + } + + /// Registers self as a 'listener' in mio::Registry + pub fn register(&mut self, registry: &mio::Registry) { + let interest = self.event_set(); + registry + .register(&mut self.socket, CLIENT, interest) + .unwrap(); + } + + /// Reregisters self as a 'listener' in mio::Registry. + pub fn reregister(&mut self, registry: &mio::Registry) { + let interest = self.event_set(); + registry + .reregister(&mut self.socket, CLIENT, interest) + .unwrap(); + } + + /// Use wants_read/wants_write to register for different mio-level + /// IO readiness events. + fn event_set(&self) -> mio::Interest { + let rd = self.tls_conn.wants_read(); + let wr = self.tls_conn.wants_write(); + + if rd && wr { + mio::Interest::READABLE | mio::Interest::WRITABLE + } else if wr { + mio::Interest::WRITABLE + } else { + mio::Interest::READABLE + } + } + + pub fn is_closed(&self) -> bool { + self.closing + } + } + impl io::Write for TlsClient { + fn write(&mut self, bytes: &[u8]) -> io::Result { + self.tls_conn.writer().write(bytes) + } + + fn flush(&mut self) -> io::Result<()> { + self.tls_conn.writer().flush() + } + } + + impl io::Read for TlsClient { + fn read(&mut self, bytes: &mut [u8]) -> io::Result { + self.tls_conn.reader().read(bytes) + } + } +}