Skip to content

Commit

Permalink
client
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Jan 20, 2024
1 parent c27320f commit 4a428f3
Show file tree
Hide file tree
Showing 10 changed files with 284 additions and 111 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,21 @@ Search engine written in Rust, based on an inverted index on disk.
**Index construction**
- [x] In-memory datasets index construction;
- [x] Proper vocabulary and paths on disk;
- [ ] Spelling correction index;
- [ ] Disk-based partial index construction and merging;
- [ ] Spelling correction index.

**Queries**
- [x] Tf-idf ranked retrieval;
- [x] Window computation.
- [ ] Boolean queries;
- [ ] Parallel scoring.
- [x] Window computation;
- [ ] FIle content retrieval.

**Evaluation**
- [ ] Query speed;
- [ ] Query quality;
- [ ] Disk overhead.

**Client**
- [x] CLI
- [ ] Web interface
- [x] CLI;
- [ ] Web interface.

## Crates in use
- [stemmer-rs](/~https://github.com/lise-henry/stemmer-rs)
Expand Down
6 changes: 5 additions & 1 deletion client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
actix-web = "4.4.1"
askama = "0.12.1"
axum = "0.7.4"
env_logger = "0.11.0"
log = "0.4.20"
search = { path = "../search" }
serde = "1.0.195"
tokio = { version = "1.35.1", features = ["macros", "rt-multi-thread"] }
169 changes: 115 additions & 54 deletions client/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,138 @@
use actix_web::{post, web, App, HttpServer, Responder, Result};
use askama::Template;
use axum::{
extract::{Json, State},
http::StatusCode,
response::{Html, IntoResponse, Response},
routing::{get, post},
Router,
};
use log::info;
use search::query::QueryProcessor;
use serde::{Deserialize, Serialize};
use std::{env, sync::Mutex, time::Instant};
use std::{
env,
sync::{Arc, Mutex},
time::Instant,
};

struct AppState {
query_processor: Mutex<QueryProcessor>,
index_path: String,
}

#[tokio::main]
async fn main() {
// logger
std::env::set_var("RUST_LOG", "info");
env_logger::init();

let args: Vec<String> = env::args().collect();

if args.len() < 2 {
println!("Usage: cargo run --bin client <base_path>");
return;
}

let base_path = &args[1];
let index_path = format!("{}/index/index", base_path);
let tokenizer_path = format!("{}/tokenizer/bert-base-uncased", base_path);

let state = Arc::new(AppState {
query_processor: Mutex::new(QueryProcessor::build_query_processor(
&index_path,
&tokenizer_path,
)),
index_path: base_path.clone(),
});

let app = Router::new()
.route("/", get(root))
.route("/query", post(post_query))
.with_state(state);

let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();

info!("Application started on http://0.0.0.0:3000");

axum::serve(listener, app).await.unwrap();
}

// utility struct to render templates
struct HtmlTemplate<T>(T);

impl<T> IntoResponse for HtmlTemplate<T>
where
T: Template,
{
fn into_response(self) -> Response {
match self.0.render() {
Ok(html) => Html(html).into_response(),

Err(err) => (
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to render template. Error: {}", err),
)
.into_response(),
}
}
}

// homepage
#[derive(Template)]
#[template(path = "index.html")]
struct Root {
index_path: String,
}

async fn root(State(state): State<Arc<AppState>>) -> impl IntoResponse {
info!("Root request");
HtmlTemplate(Root {
index_path: state.index_path.clone(),
})
}

// query handler

#[derive(Deserialize, Debug)]
struct QueryRequest {
query: String,
limit: usize,
// limit: usize,
}

#[derive(Serialize)]
#[derive(Template)]
#[template(path = "query.html")]
struct QueryResponse {
num_results: u32,
time_ms: u128,
documents: Vec<QueryDocumentResponse>,
documents: Vec<Document>,
}

#[derive(Serialize)]
struct QueryDocumentResponse {
#[derive(Serialize, Deserialize)]
struct Document {
id: u32,
score: f32,
path: String,
}

#[post("/query")]
async fn query(
r: web::Json<QueryRequest>,
q: web::Data<Mutex<QueryProcessor>>,
) -> Result<impl Responder> {
println!("query: {:?}", r);
async fn post_query(
State(state): State<Arc<AppState>>,
Json(payload): Json<QueryRequest>,
) -> impl IntoResponse {
info!("Query request: {:?}", payload);

let mut local_q = q.lock().unwrap();
let mut q = state.query_processor.lock().unwrap();

let start_time = Instant::now();
let result = local_q.query(&r.query, r.limit);
let elapsed_time = start_time.elapsed();

let response = QueryResponse {
num_results: result.len() as u32,
time_ms: elapsed_time.as_millis(),
documents: result
.iter()
.map(|e| QueryDocumentResponse {
id: e.id,
score: e.score,
path: e.path.clone(),
})
.collect(),
};

Ok(web::Json(response))
}

#[actix_web::main]
async fn main() -> std::io::Result<()> {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
println!("Usage: cargo run --bin client <base_path>");
return Ok(());
}
let query_result = q.query(&payload.query, 10);
let time_ms = start_time.elapsed().as_millis();

let base_path = &args[1];
let index_path = format!("{}/index/index", base_path);
let tokenizer_path = format!("{}/tokenizer/bert-base-uncased", base_path);
let documents = query_result
.iter()
.map(|r| Document {
id: r.id,
score: r.score,
path: r.path.clone(),
})
.collect();

HttpServer::new(move || {
App::new()
.app_data(web::Data::new(Mutex::new(
QueryProcessor::build_query_processor(&index_path, &tokenizer_path),
)))
.service(query)
})
.bind(("127.0.0.1", 8080))?
.run()
.await
HtmlTemplate(QueryResponse { time_ms, documents })
}
90 changes: 90 additions & 0 deletions client/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<!DOCTYPE html>
<html lang="en">

<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>search-rs</title>

<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">

<script src="https://unpkg.com/htmx.org@1.9.9"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/js/all.min.js"></script>
<script src="https://unpkg.com/htmx.org/dist/ext/json-enc.js"></script>

<style>
body {
display: flex;
justify-content: center;
/* align-items: center; */
/* Center vertically */
/* height: 100vh; */
width: 60vw;
margin: auto;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #1a1a1a;
color: #fff;
}

.search-container {
text-align: center;
margin-top: 100px;
}

.search-title {
font-size: 60px;
padding-bottom: 10px;
}

.search-bar-container {
position: relative;
display: inline-block;
}

.search-bar {
padding: 15px;
border: none;
border-radius: 10px;
outline: none;
font-size: 18px;
width: 400px;
background-color: #333;
color: #fff;
padding-left: 40px;
}

.search-icon {
position: absolute;
left: 15px;
top: 50%;
transform: translateY(-50%);
color: #ccc;
}

.search-time {
margin: 10px;
}

.search-result {
margin: 10px;
}
</style>
</head>

<body>

<div class="search-container">
<h1 class="search-title">index on {{index_path}}</h1>
<div class="search-bar-container">
<i class="fas fa-search search-icon"></i>
<input type="text" class="search-bar" name="query" autofocus hx-post="/query" hx-ext='json-enc'
hx-target=".search-results" hx-trigger="keyup[keyCode==13]">
</div>
<div class="search-results" hx-target=".search-results">
<!-- Search results will appear here -->
</div>
</div>

</body>

</html>
4 changes: 4 additions & 0 deletions client/templates/query.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<div class="search-time">Query completed in {{ time_ms }} ms<div>
{% for doc in documents %}
<div class="search-result">{{ loop.index }} - {{ doc.path }}</div>
{% endfor %}
35 changes: 25 additions & 10 deletions search/src/index/documents.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::DOCUMENTS_EXTENSION;
use super::{utils, DOCUMENTS_EXTENSION};
use crate::disk::{bits_reader::BitsReader, bits_writer::BitsWriter};

#[derive(Clone)]
Expand All @@ -8,25 +8,40 @@ pub struct Document {
}

pub fn write_documents(documents: &Vec<Document>, output_path: &str) {
let doc_path = output_path.to_string() + DOCUMENTS_EXTENSION;
let mut doc_writer = BitsWriter::new(&doc_path);
let path = output_path.to_string() + DOCUMENTS_EXTENSION;
let mut writer = BitsWriter::new(&path);

doc_writer.write_vbyte(documents.len() as u32);
let mut prev = "";

writer.write_vbyte(documents.len() as u32);
documents.iter().for_each(|l| {
doc_writer.write_str(&l.path);
doc_writer.write_vbyte(l.lenght);
let p_len = utils::get_matching_prefix_len(prev, &l.path);
writer.write_gamma(p_len as u32);
let remaining: String = l.path.chars().skip(p_len).collect();
prev = &l.path;

writer.write_str(&remaining);
writer.write_vbyte(l.lenght);
});

doc_writer.flush();
writer.flush();
}

pub fn load_documents(input_path: &str) -> Vec<Document> {
let mut reader = BitsReader::new(&(input_path.to_string() + DOCUMENTS_EXTENSION));

let mut prev: String = "".to_string();
(0..reader.read_vbyte())
.map(|_| Document {
path: reader.read_str(),
lenght: reader.read_vbyte(),
.map(|_| {
let p_len = reader.read_gamma();
let prefix: String = prev.chars().take(p_len as usize).collect();
let s = prefix + &reader.read_str();
prev = s.clone();

Document {
path: s,
lenght: reader.read_vbyte(),
}
})
.collect()
}
1 change: 1 addition & 0 deletions search/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod builder;
mod documents;
mod postings;
mod text;
mod utils;
mod vocabulary;

use fxhash::FxHashMap;
Expand Down
Loading

0 comments on commit 4a428f3

Please sign in to comment.