From d2e9d75b0e9ce2b1a3c05e7b4ae7216323b3a516 Mon Sep 17 00:00:00 2001 From: Myzel394 <50424412+Myzel394@users.noreply.github.com> Date: Sun, 28 Jan 2024 20:45:02 +0100 Subject: [PATCH] feat: Add first wip for parsers --- Cargo.lock | 40 +++++++++++++++++ Cargo.toml | 2 + src/client.rs | 16 +++++-- src/engines.rs | 2 + src/engines/duckduckgo.rs | 90 ++++++++++++++++++++++++++++++++++++++ src/engines/engine_base.rs | 13 ++++++ src/main.rs | 24 ++++++---- 7 files changed, 175 insertions(+), 12 deletions(-) create mode 100644 src/engines.rs create mode 100644 src/engines/duckduckgo.rs create mode 100644 src/engines/engine_base.rs diff --git a/Cargo.lock b/Cargo.lock index f5624b3..2d4582f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -537,6 +546,35 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "reqwest" version = "0.11.23" @@ -787,6 +825,8 @@ dependencies = [ name = "tcp_test" version = "0.1.0" dependencies = [ + "lazy_static", + "regex", "reqwest", "rustls", "tokio", diff --git a/Cargo.toml b/Cargo.toml index e92e71c..531d9da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +lazy_static = "1.4.0" +regex = "1.10.3" reqwest = "0.11.23" rustls = "0.22.2" tokio = "1.35.1" diff --git a/src/client.rs b/src/client.rs index af98868..29930da 100644 --- a/src/client.rs +++ b/src/client.rs @@ -58,7 +58,16 @@ pub mod client { "{} {} HTTP/1.1\r\n", "Host: {}\r\n", "Connection: close\r\n", - "Accept-Encoding: identity\r\n", + // "Accept-Encoding: identity\r\n", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\r\n", + "Upgrade-Insecure-Requests: 1\r\n", + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8\r\n", + "Dnt: 1\r\n", + "Accept-Language: en-US,en;q=0.9\r\n", + "Content-Type: application/x-www-form-urlencoded\r\n", + "Content-Length: 6\r\n", + "\r\n", + "q=duck\r\n", "\r\n", ), method, @@ -70,13 +79,14 @@ pub mod client { pub fn request( &self, method: &str, - on_partial: fn(&[u8; PACKET_SIZE], &[u8]), + // on_partial: Box, ) -> Result, Box> { let mut connection = self.create_connection()?; let mut sock = self.create_tcp_stream()?; let mut tls = rustls::Stream::new(&mut connection, &mut sock); let http_header = self.create_http_header(method)?; + dbg!(&http_header); tls.write_all(&http_header.as_bytes())?; // Read packages one by one @@ -86,7 +96,7 @@ pub mod client { let n = tls.read(&mut buf)?; - on_partial(&buf, &data); + // (on_partial)(&buf, &data); if n == 0 { break; diff --git a/src/engines.rs b/src/engines.rs new file mode 100644 index 0000000..d55bb15 --- /dev/null +++ b/src/engines.rs @@ -0,0 +1,2 @@ +pub mod duckduckgo; +pub mod engine_base; diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs new file mode 100644 index 0000000..8b30fe2 --- /dev/null +++ b/src/engines/duckduckgo.rs @@ -0,0 +1,90 @@ +// Search engine parser for DuckDuckGo +pub mod duckduckgo { + // Results start at: + //