From 68114dc34183e8b29c28103cfa5a922289489dcf Mon Sep 17 00:00:00 2001 From: sergey Date: Sat, 5 Mar 2022 09:10:08 -0500 Subject: [PATCH] Updated README to reflect recent changes --- README.md | 6 ++++-- py-gaoya/README.md | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9d7c6d4..d9f54b5 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ This project implements Locality Sensitive Hashing algorithms and data structure The primary use cases for Gaoya are deduplication and clustering. ## Main Features +* 64,32,16,8 bit minhash +* 64,128 bit simhash * Fast implementation in Rust * Multi-threaded thanks to [rayon](/~https://github.com/rayon-rs/rayon) * Python bindings @@ -51,7 +53,7 @@ $ pip3 install gaoya ### Rust Example ```rust -use gaoya::minhash::{MinHashIndex, MinHasher32V1, MinHasher} ; +use gaoya::minhash::{MinHashIndex, MinHasher32, MinHasher} ; use gaoya::text::whitespace_split; use fxhash::FxHashSet; let corpus = [ @@ -61,7 +63,7 @@ let corpus = [ "Is this the first document?", "This not the first nor the second nor the third, but the fourth document"]; let (num_bands, band_width) = (42, 3); -let minhasher = MinHasher32V1::new(num_bands * band_width); +let minhasher = MinHasher32::new(num_bands * band_width); let mut index = MinHashIndex::new(num_bands, band_width, 0.5); for (i, doc) in corpus.iter().enumerate() { index.insert(i, minhasher.create_signature(whitespace_split(&doc.to_lowercase()))); diff --git a/py-gaoya/README.md b/py-gaoya/README.md index 242116b..90c1061 100644 --- a/py-gaoya/README.md +++ b/py-gaoya/README.md @@ -4,6 +4,8 @@ This project implements Locality Sensitive Hashing algorithms and data structures for indexing and querying text documents. The primary use cases for Gaoya are deduplication and clustering. +* 64,32,16,8 bit minhash +* 64,128 bit simhash * MinHash | SimHash * Powered by Rust * Multi-threaded