Skip to content

Commit

Permalink
Auto merge of #47208 - Manishearth:double-ended-searcher, r=pnkfelix
Browse files Browse the repository at this point in the history
Make double ended searchers use dependent fingers

(fixes #47175)

r? @BurntSushi @alexcrichton

needs uplift to beta
  • Loading branch information
bors committed Jan 8, 2018
2 parents 1b193de + 9066219 commit b5392f5
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
16 changes: 10 additions & 6 deletions src/libcore/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
#[inline]
fn next(&mut self) -> SearchStep {
let old_finger = self.finger;
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.haystack.len()) };
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
if let Some(ch) = iter.next() {
Expand All @@ -304,7 +304,8 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
fn next_match(&mut self) -> Option<(usize, usize)> {
loop {
// get the haystack after the last character found
let bytes = if let Some(slice) = self.haystack.as_bytes().get(self.finger..) {
let bytes = if let Some(slice) = self.haystack.as_bytes()
.get(self.finger..self.finger_back) {
slice
} else {
return None;
Expand Down Expand Up @@ -340,7 +341,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
}
} else {
// found nothing, exit
self.finger = self.haystack.len();
self.finger = self.finger_back;
return None;
}
}
Expand All @@ -353,7 +354,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
#[inline]
fn next_back(&mut self) -> SearchStep {
let old_finger = self.finger_back;
let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
let slice = unsafe { self.haystack.slice_unchecked(self.finger, old_finger) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
if let Some(ch) = iter.next_back() {
Expand All @@ -374,14 +375,17 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
let haystack = self.haystack.as_bytes();
loop {
// get the haystack up to but not including the last character searched
let bytes = if let Some(slice) = haystack.get(..self.finger_back) {
let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) {
slice
} else {
return None;
};
// the last byte of the utf8 encoded needle
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memrchr(last_byte, bytes) {
// we searched a slice that was offset by self.finger,
// add self.finger to recoup the original index
let index = self.finger + index;
// memrchr will return the index of the byte we wish to
// find. In case of an ASCII character, this is indeed
// were we wish our new finger to be ("after" the found
Expand Down Expand Up @@ -412,7 +416,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
// found the last byte when searching in reverse.
self.finger_back = index;
} else {
self.finger_back = 0;
self.finger_back = self.finger;
// found nothing, exit
return None;
}
Expand Down
38 changes: 38 additions & 0 deletions src/libcore/tests/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,41 @@ fn test_reverse_search_shared_bytes() {
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
);
}

#[test]
fn double_ended_regression_test() {
// /~https://github.com/rust-lang/rust/issues/47175
// Ensures that double ended searching comes to a convergence
search_asserts!("abcdeabcdeabcde", 'a', "alternating double ended search",
[next_match, next_match_back, next_match, next_match_back],
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
);
search_asserts!("abcdeabcdeabcde", 'a', "triple double ended search for a",
[next_match, next_match_back, next_match_back, next_match_back],
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
);
search_asserts!("abcdeabcdeabcde", 'd', "triple double ended search for d",
[next_match, next_match_back, next_match_back, next_match_back],
[InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
);
search_asserts!(STRESS, 'Á', "Double ended search for two-byte Latin character",
[next_match, next_match_back, next_match, next_match_back],
[InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
);
search_asserts!(STRESS, '각', "Reverse double ended search for three-byte Hangul character",
[next_match_back, next_back, next_match, next, next_match_back, next_match],
[InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
);
search_asserts!(STRESS, 'ก', "Double ended search for three-byte Thai character",
[next_match, next_back, next, next_match_back, next_match],
[InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
);
search_asserts!(STRESS, '😁', "Double ended search for four-byte emoji",
[next_match_back, next, next_match, next_back, next_match],
[InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
);
search_asserts!(STRESS, 'ꁁ', "Double ended search for three-byte Yi character with repeated bytes",
[next_match, next, next_match_back, next_back, next_match],
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
);
}

0 comments on commit b5392f5

Please sign in to comment.