Skip to content

Commit

Permalink
[StringUtils::indexOfAnyBut] further reduction of algorithm
Browse files Browse the repository at this point in the history
by simplifying set consideration:
find index i of first char in seq such that (seq.codePointAt(i) ∉ { x ∈
codepoints(searchChars) })
  • Loading branch information
IBue committed Dec 6, 2024
1 parent f200256 commit 0e372a6
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions src/main/java/org/apache/commons/lang3/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -2819,7 +2818,7 @@ public static int indexOfAny(final CharSequence cs, final String searchChars) {
/**
* Searches a CharSequence to find the first index of any
* character not in the given set of characters, i.e.,
* find index i of first char in cs such that (cs.codePointAt(i) ∈ {x ∈ codepoints(cs) ∣ x ∉ codepoints(searchChars) })
* find index i of first char in cs such that (cs.codePointAt(i) ∉ { x ∈ codepoints(searchChars) })
*
* <p>A {@code null} CharSequence will return {@code -1}.
* A {@code null} or zero length search array will return {@code -1}.</p>
Expand Down Expand Up @@ -2851,7 +2850,7 @@ public static int indexOfAnyBut(final CharSequence cs, final char... searchChars
/**
* Search a CharSequence to find the first index of any
* character not in the given set of characters, i.e.,
* find index i of first char in seq such that (seq.codePointAt(i) ∈ {x ∈ codepoints(seq) ∣ x ∉ codepoints(searchChars) })
* find index i of first char in seq such that (seq.codePointAt(i) ∉ { x ∈ codepoints(searchChars) })
*
* <p>A {@code null} CharSequence will return {@code -1}.
* A {@code null} or empty search string will return {@code -1}.</p>
Expand All @@ -2876,16 +2875,13 @@ public static int indexOfAnyBut(final CharSequence seq, final CharSequence searc
if (isEmpty(seq) || isEmpty(searchChars)) {
return INDEX_NOT_FOUND;
}
final Set<Integer> seqSetCodePoints = seq.codePoints().boxed().collect(Collectors.toSet()); // JDK >=10: Collectors::toUnmodifiableSet
final Set<Integer> searchSetCodePoints = searchChars.codePoints().boxed()
.collect(Collectors.toSet()); // JDK >=10: Collectors::toUnmodifiableSet
final Set<Integer> complSetCodePoints = seqSetCodePoints.stream().filter(((Predicate<Integer>) searchSetCodePoints::contains).negate()) // JDK >=11: Predicate.not(searchSetCodePoints::contains)
.collect(Collectors.toSet()); // JDK >=10: Collectors::toUnmodifiableSet
for (final ListIterator<Integer> seqListIt = seq.chars().boxed().collect(Collectors.toList()) // JDK >=16: Stream::toList, JDK >=10: Collectors::toUnmodifiableList
.listIterator(); seqListIt.hasNext(); seqListIt.next()) {
final int curSeqCharIdx = seqListIt.nextIndex();
final int curSeqCodePoint = Character.codePointAt(seq, curSeqCharIdx);
if (complSetCodePoints.contains(curSeqCodePoint)) {
if (!searchSetCodePoints.contains(curSeqCodePoint)) {
return curSeqCharIdx;
}
if (Character.isSupplementaryCodePoint(curSeqCodePoint)) {
Expand Down

0 comments on commit 0e372a6

Please sign in to comment.