feat: Add first wip for http server

This commit is contained in:
Myzel394 2024-02-03 21:35:50 +01:00
parent 0a5a846b7b
commit 78a87f207c
No known key found for this signature in database
GPG Key ID: 79CC92F37B3E1A2B
5 changed files with 273 additions and 123 deletions

27
Cargo.lock generated
View File

@ -310,6 +310,7 @@ checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
@ -332,12 +333,34 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
[[package]]
name = "futures-executor"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
[[package]]
name = "futures-macro"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.30"
@ -359,6 +382,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
@ -1416,6 +1440,8 @@ dependencies = [
name = "tcp_test"
version = "0.1.0"
dependencies = [
"async-trait",
"futures",
"lazy-regex",
"lazy_static",
"regex",
@ -1505,6 +1531,7 @@ dependencies = [
"libc",
"mio",
"num_cpus",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",

View File

@ -6,12 +6,14 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1.77"
futures = "0.3.30"
lazy-regex = "3.1.0"
lazy_static = "1.4.0"
regex = "1.10.3"
reqwest = "0.11.23"
rocket = "0.5.0"
rustls = "0.22.2"
tokio = "1.35.1"
tokio = {version = "1.35.1", features = ["full"]}
urlencoding = "2.1.3"
webpki-roots = "0.26.0"

View File

@ -1,46 +1,23 @@
// Search engine parser for DuckDuckGo
pub mod duckduckgo {
use lazy_regex::regex_replace_all;
// Results start at:
// <div id="links" class="results">
// Example for a result:
// <div class="result results_links results_links_deep web-result ">
// <div class="links_main links_deep result__body">
// <h2 class="result__title">
// <a
// rel="nofollow" class="result__a"
// href="https://www.speedtest.net/">
// Speedtest by Ookla - The Global Broadband Speed Test
// </a>
// </h2>
// <div class="result__extras">
// <div class="result__extras__url">
// <span class="result__icon">
// <a rel="nofollow" href="https://www.speedtest.net/">
// <img class="result__icon__img" width="16" height="16" alt=""
// src="//external-content.duckduckgo.com/ip3/www.speedtest.net.ico" name="i15" />
// </a>
// </span>
// <a class="result__url" href="https://www.speedtest.net/">
// www.speedtest.net
// </a>
// </div>
// </div>
// <a
// class="result__snippet"
// href="https://www.speedtest.net/">
// Use Speedtest on all your devices with our free desktop and mobile apps.
// </a>
// <div class="clear"></div>
// </div>
// </div>
use std::{
cmp::min,
collections::VecDeque,
pin::Pin,
str::Bytes,
task::{Context, Poll},
};
use async_trait::async_trait;
use futures::Stream;
use lazy_static::lazy_static;
use regex::Regex;
use urlencoding::decode;
use crate::{
engines::engine_base::engine_base::{EngineBase, SearchResult},
utils::utils::{decode_html_text, replace_html_unicode},
client::client::{Client, PACKET_SIZE},
engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
utils::utils::decode_html_text,
};
lazy_static! {
@ -50,10 +27,15 @@ pub mod duckduckgo {
static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
}
pub type CallbackType = Box<dyn FnMut(SearchResult) -> () + Send + Sync>;
pub struct DuckDuckGo {
pub search_results: Vec<SearchResult>,
callback: CallbackType,
pub completed: bool,
results_started: bool,
previous_block: String,
// Holds all results until consumed by iterator
pub results: VecDeque<SearchResult>,
}
impl DuckDuckGo {
@ -65,14 +47,86 @@ pub mod duckduckgo {
self.previous_block.clear();
self.previous_block.push_str(&remaining_text);
}
pub fn new() -> Self {
Self {
callback: Box::new(|_: SearchResult| {}),
results_started: false,
previous_block: String::new(),
results: VecDeque::new(),
completed: false,
}
}
pub fn set_callback(&mut self, callback: CallbackType) {
self.callback = callback;
}
}
// impl Stream for DuckDuckGo {
// type Item = String;
//
// fn poll_next(
// self: Pin<&mut Self>,
// cx: &mut Context<'_>,
// ) -> std::task::Poll<Option<Self::Item>> {
// if self.results.len() > 0 {
// let result = &mut self.results.pop_front().unwrap();
//
// let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
//
// return Poll::Ready(Some(html));
// }
//
// if self.completed {
// return Poll::Ready(None);
// }
//
// Poll::Pending
// }
// }
// impl Iterator for DuckDuckGo {
// type Item = SearchResult;
//
// fn next(&mut self) -> Option<SearchResult> {
// if self.results.len() > 0 {
// let oldest = self.results.pop_front().unwrap();
//
// Some(oldest)
// } else {
// None
// }
// }
// }
#[async_trait]
impl EngineBase for DuckDuckGo {
fn get_search_results(&self) -> &Vec<SearchResult> {
&self.search_results
fn search(&mut self, query: &str) {
dbg!("searching duckduckgo");
let client = Client::new("https://html.duckduckgo.com/html/");
let packets = client.request(&"POST").unwrap();
for ii in (0..packets.len()).step_by(PACKET_SIZE) {
let end_range = min(packets.len(), ii + PACKET_SIZE);
let slice = &packets[ii..end_range];
self.parse_packet(slice.iter());
// Call callback, there is probably a better way to do this
// while self.results.len() > 0 {
// let result = self.results.pop_front().unwrap();
//
// (self.callback)(result);
// }
}
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) -> String {
self.completed = true;
}
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
let raw_text = String::from_utf8_lossy(&bytes);
let text = STRIP.replace_all(&raw_text, " ");
@ -82,33 +136,38 @@ pub mod duckduckgo {
match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
Some(captures) => {
let title = decode(captures.name("title").unwrap().as_str()).unwrap();
let title = decode(captures.name("title").unwrap().as_str())
.unwrap()
.into_owned();
let description_raw =
decode_html_text(captures.name("description").unwrap().as_str())
.unwrap();
let description = STRIP_HTML_TAGS.replace_all(&description_raw, "");
let url = decode(captures.name("url").unwrap().as_str()).unwrap();
let description = STRIP_HTML_TAGS
.replace_all(&description_raw, "")
.into_owned();
let url = decode(captures.name("url").unwrap().as_str())
.unwrap()
.into_owned();
let result = SearchResult {
title,
description,
url,
engine: SearchEngine::DuckDuckGo,
};
let end_position = captures.name("end").unwrap().end();
self.slice_remaining_block(&end_position);
// (self.callback)(result);
self.results.push_back(result);
}
None => {}
}
} else if RESULTS_START.is_match(&text) {
self.results_started = true;
}
"".to_owned()
}
}
impl DuckDuckGo {
pub fn new() -> Self {
Self {
search_results: Vec::new(),
results_started: false,
previous_block: String::new(),
}
}
}
}

View File

@ -1,13 +1,22 @@
pub mod engine_base {
use async_trait::async_trait;
#[derive(Clone, Copy, Debug, Hash)]
pub enum SearchEngine {
DuckDuckGo,
}
#[derive(Clone, Debug, Hash)]
pub struct SearchResult {
pub title: String,
pub url: String,
pub description: String,
pub engine: SearchEngine,
}
#[async_trait]
pub trait EngineBase {
fn get_search_results(&self) -> &Vec<SearchResult>;
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) -> String;
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
fn search(&mut self, query: &str);
}
}

View File

@ -1,69 +1,122 @@
use std::cmp::min;
use client::client::{Client, PACKET_SIZE};
use engines::{duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase};
pub mod client;
pub mod engines;
pub mod utils;
fn main() {
let mut ddg = DuckDuckGo::new();
let client = Client::new("https://html.duckduckgo.com/html/");
let packets = client.request(&"POST").unwrap();
for ii in (0..packets.len()).step_by(PACKET_SIZE) {
let end_range = min(packets.len(), ii + PACKET_SIZE);
let slice = &packets[ii..end_range];
&ddg.parse_packet(slice.iter());
}
}
// use std::cmp::min;
//
// use rocket::response::stream::TextStream;
// use rocket::tokio::time::{self, Duration};
//
// use client::client::{Client, PACKET_SIZE};
// use engines::{duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase};
// use engines::{
// duckduckgo::duckduckgo::DuckDuckGo,
// engine_base::engine_base::{EngineBase, SearchResult},
// };
//
// pub mod client;
// pub mod engines;
// pub mod utils;
//
// #[macro_use]
// extern crate rocket;
// #[tokio::main]
// async fn main() {
// let callback = Box::new(|result: SearchResult| {
// dbg!(&result);
// });
// let mut ddg = DuckDuckGo::new(callback);
// ddg.search(&"test").await;
//
// #[get("/")]
// fn index() -> &'static str {
// "Hello, world!"
// }
//
// #[get("/infinite-hellos")]
// fn hello() -> TextStream![String] {
// let mut ddg = DuckDuckGo::new();
// let client = Client::new("https://html.duckduckgo.com/html/");
//
// let packets = client.request(&"POST").unwrap();
//
// TextStream! {
// let mut interval = time::interval(Duration::from_secs(1));
// interval.tick().await;
//
// for ii in (0..packets.len()).step_by(PACKET_SIZE) {
// let end_range = min(packets.len(), ii + PACKET_SIZE);
//
// let slice = &packets[ii..end_range];
// yield ddg.parse_packet(slice.iter()).to_string();
// }
// }
// }
//
// #[launch]
// fn rocket() -> _ {
// rocket::build()
// .mount("/", routes![index])
// .mount("/", routes![hello])
// println!("done");
// }
// Found no other way to make this work
#![feature(async_closure)]
use std::{
cmp::max,
sync::{Arc, RwLock},
};
use rocket::response::stream::{ReaderStream, TextStream};
use engines::{
duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase,
engine_base::engine_base::SearchResult,
};
use tokio::sync::Mutex;
pub mod client;
pub mod engines;
pub mod utils;
#[macro_use]
extern crate rocket;
struct SearchParams<'r> {
query: &'r str,
}
#[get("/")]
fn index() -> &'static str {
"Hello, world!"
}
#[get("/search")]
fn search_get() -> &'static str {
"<html>
<body>
<form method='get' action='/searchquery'>
<input name='query'>
<button type='submit'>Search</button>
</form>
</body>
</html>"
}
#[get("/searchquery?<query>")]
async fn hello<'a>(query: &str) -> TextStream![String] {
let query_box = Box::new(query.to_string());
TextStream! {
let start = "<html><body>".to_string();
yield start;
let ddg_tv = Arc::new(
Mutex::new(
DuckDuckGo::new(),
),
);
let ddg_tv_clone = ddg_tv.clone();
tokio::spawn(async move {
ddg_tv_clone.lock().await.search(&query_box);
});
let mut last_position: i32 = -1;
loop {
let ddg = ddg_tv.lock().await;
let len = ddg.results.len() as i32;
if ddg.completed && last_position == len {
break;
}
if last_position < (len - 1) {
for i in max(0, last_position)..=(len - 1) {
match ddg.results.get(i as usize).clone() {
Some(result) => {
let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
yield html;
}
None => {
break;
}
}
}
last_position = len;
}
}
let end = "</body></html>".to_string();
yield end;
}
}
#[launch]
fn rocket() -> _ {
rocket::build()
.mount("/", routes![index])
.mount("/", routes![hello])
.mount("/", routes![search_get])
}