mirror of
https://github.com/Myzel394/tifsep.git
synced 2025-06-18 15:35:26 +02:00
feat: Add first wip for http server
This commit is contained in:
parent
0a5a846b7b
commit
78a87f207c
27
Cargo.lock
generated
27
Cargo.lock
generated
@ -310,6 +310,7 @@ checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
@ -332,12 +333,34 @@ version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.30"
|
||||
@ -359,6 +382,7 @@ dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
@ -1416,6 +1440,8 @@ dependencies = [
|
||||
name = "tcp_test"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures",
|
||||
"lazy-regex",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
@ -1505,6 +1531,7 @@ dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2",
|
||||
|
@ -6,12 +6,14 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1.77"
|
||||
futures = "0.3.30"
|
||||
lazy-regex = "3.1.0"
|
||||
lazy_static = "1.4.0"
|
||||
regex = "1.10.3"
|
||||
reqwest = "0.11.23"
|
||||
rocket = "0.5.0"
|
||||
rustls = "0.22.2"
|
||||
tokio = "1.35.1"
|
||||
tokio = {version = "1.35.1", features = ["full"]}
|
||||
urlencoding = "2.1.3"
|
||||
webpki-roots = "0.26.0"
|
||||
|
@ -1,46 +1,23 @@
|
||||
// Search engine parser for DuckDuckGo
|
||||
pub mod duckduckgo {
|
||||
use lazy_regex::regex_replace_all;
|
||||
// Results start at:
|
||||
// <div id="links" class="results">
|
||||
// Example for a result:
|
||||
// <div class="result results_links results_links_deep web-result ">
|
||||
// <div class="links_main links_deep result__body">
|
||||
// <h2 class="result__title">
|
||||
// <a
|
||||
// rel="nofollow" class="result__a"
|
||||
// href="https://www.speedtest.net/">
|
||||
// Speedtest by Ookla - The Global Broadband Speed Test
|
||||
// </a>
|
||||
// </h2>
|
||||
// <div class="result__extras">
|
||||
// <div class="result__extras__url">
|
||||
// <span class="result__icon">
|
||||
// <a rel="nofollow" href="https://www.speedtest.net/">
|
||||
// <img class="result__icon__img" width="16" height="16" alt=""
|
||||
// src="//external-content.duckduckgo.com/ip3/www.speedtest.net.ico" name="i15" />
|
||||
// </a>
|
||||
// </span>
|
||||
// <a class="result__url" href="https://www.speedtest.net/">
|
||||
// www.speedtest.net
|
||||
// </a>
|
||||
// </div>
|
||||
// </div>
|
||||
// <a
|
||||
// class="result__snippet"
|
||||
// href="https://www.speedtest.net/">
|
||||
// Use Speedtest on all your devices with our free desktop and mobile apps.
|
||||
// </a>
|
||||
// <div class="clear"></div>
|
||||
// </div>
|
||||
// </div>
|
||||
use std::{
|
||||
cmp::min,
|
||||
collections::VecDeque,
|
||||
pin::Pin,
|
||||
str::Bytes,
|
||||
task::{Context, Poll},
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::Stream;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use urlencoding::decode;
|
||||
|
||||
use crate::{
|
||||
engines::engine_base::engine_base::{EngineBase, SearchResult},
|
||||
utils::utils::{decode_html_text, replace_html_unicode},
|
||||
client::client::{Client, PACKET_SIZE},
|
||||
engines::engine_base::engine_base::{EngineBase, SearchEngine, SearchResult},
|
||||
utils::utils::decode_html_text,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
@ -50,10 +27,15 @@ pub mod duckduckgo {
|
||||
static ref STRIP_HTML_TAGS: Regex = Regex::new(r#"<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>"#).unwrap();
|
||||
}
|
||||
|
||||
pub type CallbackType = Box<dyn FnMut(SearchResult) -> () + Send + Sync>;
|
||||
|
||||
pub struct DuckDuckGo {
|
||||
pub search_results: Vec<SearchResult>,
|
||||
callback: CallbackType,
|
||||
pub completed: bool,
|
||||
results_started: bool,
|
||||
previous_block: String,
|
||||
// Holds all results until consumed by iterator
|
||||
pub results: VecDeque<SearchResult>,
|
||||
}
|
||||
|
||||
impl DuckDuckGo {
|
||||
@ -65,14 +47,86 @@ pub mod duckduckgo {
|
||||
self.previous_block.clear();
|
||||
self.previous_block.push_str(&remaining_text);
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
callback: Box::new(|_: SearchResult| {}),
|
||||
results_started: false,
|
||||
previous_block: String::new(),
|
||||
results: VecDeque::new(),
|
||||
completed: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_callback(&mut self, callback: CallbackType) {
|
||||
self.callback = callback;
|
||||
}
|
||||
}
|
||||
|
||||
// impl Stream for DuckDuckGo {
|
||||
// type Item = String;
|
||||
//
|
||||
// fn poll_next(
|
||||
// self: Pin<&mut Self>,
|
||||
// cx: &mut Context<'_>,
|
||||
// ) -> std::task::Poll<Option<Self::Item>> {
|
||||
// if self.results.len() > 0 {
|
||||
// let result = &mut self.results.pop_front().unwrap();
|
||||
//
|
||||
// let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
|
||||
//
|
||||
// return Poll::Ready(Some(html));
|
||||
// }
|
||||
//
|
||||
// if self.completed {
|
||||
// return Poll::Ready(None);
|
||||
// }
|
||||
//
|
||||
// Poll::Pending
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl Iterator for DuckDuckGo {
|
||||
// type Item = SearchResult;
|
||||
//
|
||||
// fn next(&mut self) -> Option<SearchResult> {
|
||||
// if self.results.len() > 0 {
|
||||
// let oldest = self.results.pop_front().unwrap();
|
||||
//
|
||||
// Some(oldest)
|
||||
// } else {
|
||||
// None
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[async_trait]
|
||||
impl EngineBase for DuckDuckGo {
|
||||
fn get_search_results(&self) -> &Vec<SearchResult> {
|
||||
&self.search_results
|
||||
fn search(&mut self, query: &str) {
|
||||
dbg!("searching duckduckgo");
|
||||
|
||||
let client = Client::new("https://html.duckduckgo.com/html/");
|
||||
|
||||
let packets = client.request(&"POST").unwrap();
|
||||
|
||||
for ii in (0..packets.len()).step_by(PACKET_SIZE) {
|
||||
let end_range = min(packets.len(), ii + PACKET_SIZE);
|
||||
|
||||
let slice = &packets[ii..end_range];
|
||||
self.parse_packet(slice.iter());
|
||||
|
||||
// Call callback, there is probably a better way to do this
|
||||
// while self.results.len() > 0 {
|
||||
// let result = self.results.pop_front().unwrap();
|
||||
//
|
||||
// (self.callback)(result);
|
||||
// }
|
||||
}
|
||||
|
||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) -> String {
|
||||
self.completed = true;
|
||||
}
|
||||
|
||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) {
|
||||
let bytes: Vec<u8> = packet.map(|bit| *bit).collect();
|
||||
let raw_text = String::from_utf8_lossy(&bytes);
|
||||
let text = STRIP.replace_all(&raw_text, " ");
|
||||
@ -82,33 +136,38 @@ pub mod duckduckgo {
|
||||
|
||||
match SINGLE_RESULT.captures(&self.previous_block.to_owned()) {
|
||||
Some(captures) => {
|
||||
let title = decode(captures.name("title").unwrap().as_str()).unwrap();
|
||||
let title = decode(captures.name("title").unwrap().as_str())
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
let description_raw =
|
||||
decode_html_text(captures.name("description").unwrap().as_str())
|
||||
.unwrap();
|
||||
let description = STRIP_HTML_TAGS.replace_all(&description_raw, "");
|
||||
let url = decode(captures.name("url").unwrap().as_str()).unwrap();
|
||||
let description = STRIP_HTML_TAGS
|
||||
.replace_all(&description_raw, "")
|
||||
.into_owned();
|
||||
let url = decode(captures.name("url").unwrap().as_str())
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
|
||||
let result = SearchResult {
|
||||
title,
|
||||
description,
|
||||
url,
|
||||
engine: SearchEngine::DuckDuckGo,
|
||||
};
|
||||
|
||||
let end_position = captures.name("end").unwrap().end();
|
||||
self.slice_remaining_block(&end_position);
|
||||
|
||||
// (self.callback)(result);
|
||||
|
||||
self.results.push_back(result);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
} else if RESULTS_START.is_match(&text) {
|
||||
self.results_started = true;
|
||||
}
|
||||
|
||||
"".to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
impl DuckDuckGo {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
search_results: Vec::new(),
|
||||
results_started: false,
|
||||
previous_block: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,22 @@
|
||||
pub mod engine_base {
|
||||
use async_trait::async_trait;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash)]
|
||||
pub enum SearchEngine {
|
||||
DuckDuckGo,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Hash)]
|
||||
pub struct SearchResult {
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
pub description: String,
|
||||
pub engine: SearchEngine,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait EngineBase {
|
||||
fn get_search_results(&self) -> &Vec<SearchResult>;
|
||||
|
||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>) -> String;
|
||||
fn parse_packet<'a>(&mut self, packet: impl Iterator<Item = &'a u8>);
|
||||
fn search(&mut self, query: &str);
|
||||
}
|
||||
}
|
||||
|
177
src/main.rs
177
src/main.rs
@ -1,69 +1,122 @@
|
||||
use std::cmp::min;
|
||||
|
||||
use client::client::{Client, PACKET_SIZE};
|
||||
use engines::{duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase};
|
||||
|
||||
pub mod client;
|
||||
pub mod engines;
|
||||
pub mod utils;
|
||||
|
||||
fn main() {
|
||||
let mut ddg = DuckDuckGo::new();
|
||||
let client = Client::new("https://html.duckduckgo.com/html/");
|
||||
|
||||
let packets = client.request(&"POST").unwrap();
|
||||
|
||||
for ii in (0..packets.len()).step_by(PACKET_SIZE) {
|
||||
let end_range = min(packets.len(), ii + PACKET_SIZE);
|
||||
|
||||
let slice = &packets[ii..end_range];
|
||||
&ddg.parse_packet(slice.iter());
|
||||
}
|
||||
}
|
||||
|
||||
// use std::cmp::min;
|
||||
//
|
||||
// use rocket::response::stream::TextStream;
|
||||
// use rocket::tokio::time::{self, Duration};
|
||||
//
|
||||
// use client::client::{Client, PACKET_SIZE};
|
||||
// use engines::{duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase};
|
||||
// use engines::{
|
||||
// duckduckgo::duckduckgo::DuckDuckGo,
|
||||
// engine_base::engine_base::{EngineBase, SearchResult},
|
||||
// };
|
||||
//
|
||||
// pub mod client;
|
||||
// pub mod engines;
|
||||
// pub mod utils;
|
||||
//
|
||||
// #[macro_use]
|
||||
// extern crate rocket;
|
||||
// #[tokio::main]
|
||||
// async fn main() {
|
||||
// let callback = Box::new(|result: SearchResult| {
|
||||
// dbg!(&result);
|
||||
// });
|
||||
// let mut ddg = DuckDuckGo::new(callback);
|
||||
// ddg.search(&"test").await;
|
||||
//
|
||||
// #[get("/")]
|
||||
// fn index() -> &'static str {
|
||||
// "Hello, world!"
|
||||
// }
|
||||
//
|
||||
// #[get("/infinite-hellos")]
|
||||
// fn hello() -> TextStream![String] {
|
||||
// let mut ddg = DuckDuckGo::new();
|
||||
// let client = Client::new("https://html.duckduckgo.com/html/");
|
||||
//
|
||||
// let packets = client.request(&"POST").unwrap();
|
||||
//
|
||||
// TextStream! {
|
||||
// let mut interval = time::interval(Duration::from_secs(1));
|
||||
// interval.tick().await;
|
||||
//
|
||||
// for ii in (0..packets.len()).step_by(PACKET_SIZE) {
|
||||
// let end_range = min(packets.len(), ii + PACKET_SIZE);
|
||||
//
|
||||
// let slice = &packets[ii..end_range];
|
||||
// yield ddg.parse_packet(slice.iter()).to_string();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// #[launch]
|
||||
// fn rocket() -> _ {
|
||||
// rocket::build()
|
||||
// .mount("/", routes![index])
|
||||
// .mount("/", routes![hello])
|
||||
// println!("done");
|
||||
// }
|
||||
|
||||
// Found no other way to make this work
|
||||
#![feature(async_closure)]
|
||||
|
||||
use std::{
|
||||
cmp::max,
|
||||
sync::{Arc, RwLock},
|
||||
};
|
||||
|
||||
use rocket::response::stream::{ReaderStream, TextStream};
|
||||
|
||||
use engines::{
|
||||
duckduckgo::duckduckgo::DuckDuckGo, engine_base::engine_base::EngineBase,
|
||||
engine_base::engine_base::SearchResult,
|
||||
};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
pub mod client;
|
||||
pub mod engines;
|
||||
pub mod utils;
|
||||
|
||||
#[macro_use]
|
||||
extern crate rocket;
|
||||
|
||||
struct SearchParams<'r> {
|
||||
query: &'r str,
|
||||
}
|
||||
|
||||
#[get("/")]
|
||||
fn index() -> &'static str {
|
||||
"Hello, world!"
|
||||
}
|
||||
|
||||
#[get("/search")]
|
||||
fn search_get() -> &'static str {
|
||||
"<html>
|
||||
<body>
|
||||
<form method='get' action='/searchquery'>
|
||||
<input name='query'>
|
||||
<button type='submit'>Search</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>"
|
||||
}
|
||||
|
||||
#[get("/searchquery?<query>")]
|
||||
async fn hello<'a>(query: &str) -> TextStream![String] {
|
||||
let query_box = Box::new(query.to_string());
|
||||
|
||||
TextStream! {
|
||||
let start = "<html><body>".to_string();
|
||||
yield start;
|
||||
|
||||
let ddg_tv = Arc::new(
|
||||
Mutex::new(
|
||||
DuckDuckGo::new(),
|
||||
),
|
||||
);
|
||||
let ddg_tv_clone = ddg_tv.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
ddg_tv_clone.lock().await.search(&query_box);
|
||||
});
|
||||
|
||||
let mut last_position: i32 = -1;
|
||||
|
||||
loop {
|
||||
let ddg = ddg_tv.lock().await;
|
||||
let len = ddg.results.len() as i32;
|
||||
|
||||
if ddg.completed && last_position == len {
|
||||
break;
|
||||
}
|
||||
|
||||
if last_position < (len - 1) {
|
||||
for i in max(0, last_position)..=(len - 1) {
|
||||
match ddg.results.get(i as usize).clone() {
|
||||
Some(result) => {
|
||||
let html = format!("<br><h2>{}</h2><p>{}</p>", result.title, result.description);
|
||||
yield html;
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_position = len;
|
||||
}
|
||||
}
|
||||
|
||||
let end = "</body></html>".to_string();
|
||||
yield end;
|
||||
}
|
||||
}
|
||||
|
||||
#[launch]
|
||||
fn rocket() -> _ {
|
||||
rocket::build()
|
||||
.mount("/", routes![index])
|
||||
.mount("/", routes![hello])
|
||||
.mount("/", routes![search_get])
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user