diff --git a/NEWS.md b/NEWS.md index c6a41f44e00c9..6e344b97f7bc5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -120,6 +120,10 @@ Standard library changes * Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]). * A backslash before a newline in command literals now always removes the newline, similar to standard string literals, whereas the result was not well-defined before. ([#40753]) +* `replace(::String)` now allows multiple patterns to be specified, and they + will be applied left-to-right simultaneously, so only one pattern will be + applied to any character, and the patterns will only be applied to the input + text, not the replacements. ([#TBD]) #### Package Manager diff --git a/base/set.jl b/base/set.jl index 0c8a8b95b10ce..5a744c556432c 100644 --- a/base/set.jl +++ b/base/set.jl @@ -621,7 +621,6 @@ replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!, replace!(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b, c))) replace(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b))) replace(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b, c))) -replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, c))) ### replace! for AbstractDict/AbstractSet diff --git a/base/strings/util.jl b/base/strings/util.jl index 9bc043513deed..0ca9db61e96f6 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -522,56 +522,72 @@ _replace(io, repl::Function, str, r, pattern) = _replace(io, repl::Function, str, r, pattern::Function) = print(io, repl(str[first(r)])) -replace(str::String, pat_repl::Pair{<:AbstractChar}; count::Integer=typemax(Int)) = - replace(str, isequal(first(pat_repl)) => last(pat_repl); count=count) - -replace(str::String, pat_repl::Pair{<:Union{Tuple{Vararg{AbstractChar}}, - AbstractVector{<:AbstractChar},Set{<:AbstractChar}}}; - count::Integer=typemax(Int)) = - replace(str, in(first(pat_repl)) => last(pat_repl), count=count) - _pat_replacer(x) = x _free_pat_replacer(x) = nothing -function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int)) - pattern, repl = pat_repl +_pat_replacer(x::AbstractChar) = isequal(x) +_pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x) + +function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N count == 0 && return str count < 0 && throw(DomainError(count, "`count` must be non-negative.")) n = 1 - e = lastindex(str) + e1 = nextind(str, lastindex(str)) # sizeof(str) i = a = firstindex(str) - pattern = _pat_replacer(pattern) - r = something(findnext(pattern,str,i), 0) - j, k = first(r), last(r) - if j == 0 - _free_pat_replacer(pattern) + patterns = map(p -> _pat_replacer(first(p)), pat_repl) + replaces = map(last, pat_repl) + rs = map(patterns) do p + r = findnext(p, str, a) + if r === nothing || first(r) == 0 + return e1+1:0 + end + return r + end + if all(>(e1), map(first, rs)) + foreach(_free_pat_replacer, patterns) return str end out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) - while j != 0 + while true + p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ? + r = rs[p] + j, k = first(r), last(r) + j > e1 && break if i == a || i <= k + # copy out preserved portion GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i)) - _replace(out, repl, str, r, pattern) + # copy out replacement string + _replace(out, replaces[p], str, r, patterns[p]) end if k < j i = j - j > e && break + j == e1 && break k = nextind(str, j) else i = k = nextind(str, k) end - r = something(findnext(pattern,str,k), 0) - r === 0:-1 || n == count && break - j, k = first(r), last(r) + n == count && break + let k = k + rs = map(patterns, rs) do p, r + if first(r) < k + r = findnext(p, str, k) + if r === nothing || first(r) == 0 + return e1+1:0 + end + end + return r + end + end n += 1 end - _free_pat_replacer(pattern) - write(out, SubString(str,i)) - String(take!(out)) + foreach(_free_pat_replacer, patterns) + write(out, SubString(str, i)) + return String(take!(out)) end + """ - replace(s::AbstractString, pat=>r; [count::Integer]) + replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer]) Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. @@ -584,6 +600,13 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then references in `r` are replaced with the corresponding matched text. To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`). +Multiple patterns can be specified, and they will be applied left-to-right +simultaneously, so only one pattern will be applied to any character, and the +patterns will only be applied to the input text, not the replacements. + +!!! compat "Julia 1.7" + Support for multiple patterns requires version 1.7. + # Examples ```jldoctest julia> replace("Python is a programming language.", "Python" => "Julia") @@ -597,10 +620,13 @@ julia> replace("The quick foxes run quickly.", "quick" => "", count=1) julia> replace("The quick foxes run quickly.", r"fox(es)?" => s"bus\\1") "The quick buses run quickly." + +julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a") +"bca" ``` """ -replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) = - replace(String(s), pat_f, count=count) +replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) = + replace(String(s), pat_f..., count=count) # TODO: allow transform as the first argument to replace? diff --git a/test/strings/util.jl b/test/strings/util.jl index e8ea3b643fcda..9fbcb5756bf42 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -307,6 +307,168 @@ end end +@testset "replace many" begin + # PR 35414 Francesco Alemanno + @test replace("foobarbaz", "oo"=>"zz", "ar"=>"zz", "z"=>"m") == "fzzbzzbam" + substmp=["z"=>"m", "oo"=>"zz", "ar"=>"zz"] + for perm in [[1, 2, 3], [2, 1, 3], [3, 2, 1], [2, 3, 1], [1, 3, 2], [3, 1, 2]] + @test replace("foobarbaz",substmp[perm]...) == "fzzbzzbam" + @test replace("foobarbaz",substmp[perm]...,count=2) == "fzzbzzbaz" + @test replace("foobarbaz",substmp[perm]...,count=1) == "fzzbarbaz" + end + @test replace("foobarbaz", "z"=>"m", r"a.*a"=>uppercase) == "foobARBAm" + @test replace("foobarbaz", 'o'=>'z', 'a'=>'q', 'z'=>'m') == "fzzbqrbqm" + + + # PR #25732 Klaus Crusius + @test replace("\u2202", '*' => '\0', ""=>"") == "\u2202" + + @test replace("foobar", 'o' => '0', ""=>"") == "f00bar" + @test replace("foobar", 'o' => '0', count=1, ""=>"") == "f0obar" + @test replace("foobar", 'o' => "", ""=>"") == "fbar" + @test replace("foobar", 'o' => "", count=1, ""=>"") == "fobar" + @test replace("foobar", 'f' => 'F', ""=>"") == "Foobar" + @test replace("foobar", 'r' => 'R', ""=>"") == "foobaR" + + @test replace("foofoofoo", "foo" => "bar", ""=>"") == "barbarbar" + @test replace("foobarfoo", "foo" => "baz", ""=>"") == "bazbarbaz" + @test replace("barfoofoo", "foo" => "baz", ""=>"") == "barbazbaz" + + @test replace("", "" => "", ""=>"") == "" + @test replace("", "" => "x", ""=>"") == "x" + @test replace("", "x" => "y", ""=>"") == "" + + @test replace("abcd", "" => "^", ""=>"") == "^a^b^c^d^" + @test replace("abcd", "b" => "^", ""=>"") == "a^cd" + @test replace("abcd", r"b?" => "^", ""=>"") == "^a^c^d^" + @test replace("abcd", r"b+" => "^", ""=>"") == "a^cd" + @test replace("abcd", r"b?c?" => "^", ""=>"") == "^a^d^" + @test replace("abcd", r"[bc]?" => "^", ""=>"") == "^a^^d^" + + @test replace("foobarfoo", r"(fo|ba)" => "xx", ""=>"") == "xxoxxrxxo" + @test replace("foobarfoo", r"(foo|ba)" => "bar", ""=>"") == "barbarrbar" + + @test replace("foobar", 'o' => 'ø', ""=>"") == "føøbar" + @test replace("foobar", 'o' => 'ø', count=1, ""=>"") == "føobar" + @test replace("føøbar", 'ø' => 'o', ""=>"") == "foobar" + @test replace("føøbar", 'ø' => 'o', count=1, ""=>"") == "foøbar" + @test replace("føøbar", 'ø' => 'ö', ""=>"") == "fööbar" + @test replace("føøbar", 'ø' => 'ö', count=1, ""=>"") == "föøbar" + @test replace("føøbar", 'ø' => "", ""=>"") == "fbar" + @test replace("føøbar", 'ø' => "", count=1, ""=>"") == "føbar" + @test replace("føøbar", 'f' => 'F', ""=>"") == "Føøbar" + @test replace("ḟøøbar", 'ḟ' => 'F', ""=>"") == "Føøbar" + @test replace("føøbar", 'f' => 'Ḟ', ""=>"") == "Ḟøøbar" + @test replace("ḟøøbar", 'ḟ' => 'Ḟ', ""=>"") == "Ḟøøbar" + @test replace("føøbar", 'r' => 'R', ""=>"") == "føøbaR" + @test replace("føøbaṙ", 'ṙ' => 'R', ""=>"") == "føøbaR" + @test replace("føøbar", 'r' => 'Ṙ', ""=>"") == "føøbaṘ" + @test replace("føøbaṙ", 'ṙ' => 'Ṙ', ""=>"") == "føøbaṘ" + + @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", ""=>"") == "barbarbar" + @test replace("ḟøøbarḟøø", "ḟøø" => "baz", ""=>"") == "bazbarbaz" + @test replace("barḟøøḟøø", "ḟøø" => "baz", ""=>"") == "barbazbaz" + + @test replace("foofoofoo", "foo" => "ƀäṙ", ""=>"") == "ƀäṙƀäṙƀäṙ" + @test replace("fooƀäṙfoo", "foo" => "baz", ""=>"") == "bazƀäṙbaz" + @test replace("ƀäṙfoofoo", "foo" => "baz", ""=>"") == "ƀäṙbazbaz" + + @test replace("foofoofoo", "foo" => "bar", ""=>"") == "barbarbar" + @test replace("foobarfoo", "foo" => "ƀäż", ""=>"") == "ƀäżbarƀäż" + @test replace("barfoofoo", "foo" => "ƀäż", ""=>"") == "barƀäżƀäż" + + @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", ""=>"") == "ƀäṙƀäṙƀäṙ" + @test replace("ḟøøƀäṙḟøø", "ḟøø" => "baz", ""=>"") == "bazƀäṙbaz" + @test replace("ƀäṙḟøøḟøø", "ḟøø" => "baz", ""=>"") == "ƀäṙbazbaz" + + @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", ""=>"") == "barbarbar" + @test replace("ḟøøbarḟøø", "ḟøø" => "ƀäż", ""=>"") == "ƀäżbarƀäż" + @test replace("barḟøøḟøø", "ḟøø" => "ƀäż", ""=>"") == "barƀäżƀäż" + + @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", ""=>"") == "ƀäṙƀäṙƀäṙ" + @test replace("ḟøøƀäṙḟøø", "ḟøø" => "ƀäż", ""=>"") == "ƀäżƀäṙƀäż" + @test replace("ƀäṙḟøøḟøø", "ḟøø" => "ƀäż", ""=>"") == "ƀäṙƀäżƀäż" + + @test replace("", "" => "ẍ", ""=>"") == "ẍ" + @test replace("", "ẍ" => "ÿ", ""=>"") == "" + + @test replace("äƀçđ", "" => "π", ""=>"") == "πäπƀπçπđπ" + @test replace("äƀçđ", "ƀ" => "π", ""=>"") == "äπçđ" + @test replace("äƀçđ", r"ƀ?" => "π", ""=>"") == "πäπçπđπ" + @test replace("äƀçđ", r"ƀ+" => "π", ""=>"") == "äπçđ" + @test replace("äƀçđ", r"ƀ?ç?" => "π", ""=>"") == "πäπđπ" + @test replace("äƀçđ", r"[ƀç]?" => "π", ""=>"") == "πäππđπ" + + @test replace("foobarfoo", r"(fo|ba)" => "ẍẍ", ""=>"") == "ẍẍoẍẍrẍẍo" + + @test replace("ḟøøbarḟøø", r"(ḟø|ba)" => "xx", ""=>"") == "xxøxxrxxø" + @test replace("ḟøøbarḟøø", r"(ḟøø|ba)" => "bar", ""=>"") == "barbarrbar" + + @test replace("fooƀäṙfoo", r"(fo|ƀä)" => "xx", ""=>"") == "xxoxxṙxxo" + @test replace("fooƀäṙfoo", r"(foo|ƀä)" => "ƀäṙ", ""=>"") == "ƀäṙƀäṙṙƀäṙ" + + @test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)" => "xx", ""=>"") == "xxøxxṙxxø" + @test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)" => "ƀäṙ", ""=>"") == "ƀäṙƀäṙṙƀäṙ" + + @test replace("foo", "oo" => uppercase, ""=>"") == "fOO" + + # Issue 13332 + @test replace("abc", 'b' => 2.1, ""=>"") == "a2.1c" + + # test replace with a count for String and GenericString + # check that replace is a no-op if count==0 + for s in ["aaa", Test.GenericString("aaa")] + @test_throws DomainError replace(s, 'a' => "", count = -1, ""=>"") + @test replace(s, 'a' => 'z', count=0, ""=>"") === s + @test replace(s, 'a' => 'z', count=1, ""=>"") == "zaa" + @test replace(s, 'a' => 'z', count=2, ""=>"") == "zza" + @test replace(s, 'a' => 'z', count=3, ""=>"") == "zzz" + @test replace(s, 'a' => 'z', count=4, ""=>"") == "zzz" + @test replace(s, 'a' => 'z', count=typemax(Int), ""=>"") == "zzz" + @test replace(s, 'a' => 'z', ""=>"") == "zzz" + end + + for s in ["abc"] + @test replace(s) === s + @test replace(s, 'a' => 'z', ""=>"") === "zbc" + @test replace(s, 'a' => 'z', 'b' => 'y') == "zyc" + @test replace(s, 'a' => 'z', 'c' => 'x', "b" => 'y') == "zyx" + @test replace(s, '1' => 'z', ""=>"") == s + @test replace(s, 'b' => "BbB", ""=>"", count=1) == "aBbBc" + end + + for s in ["quick quicker quickest"] + @test replace(s) === s + @test replace(s, "quick" => 'a', "quicker" => uppercase, "quickest" => 'z') == "a QUICKER z" + @test replace(s, "quick"=>"Duck", "quicker"=>"is", "quickest"=>"lame", count=2) == "Duck is quickest" + @test replace(s, "" => '1', ""=>"") == "1q1u1i1c1k1 1q1u1i1c1k1e1r1 1q1u1i1c1k1e1s1t1" + @test replace(s, "qu" => "QU", "qu" => "never happens", "ick" => "") == "QU QUer QUest" + @test replace(s, " " => '_', "r " => "r-") == "quick_quicker-quickest" + @test replace(s, r"[aeiou]" => "ä", "ui" => "ki", "i" => "I") == "qkick qkickär qkickäst" + @test replace(s, r"[^ ]+" => "word", "quicker " => "X", count=big"99") == "word Xword" + + @test replace(s, r"(quick)(e)"=>s"\2-\1", "x"=>"X") == "quick e-quickr e-quickst" + + @test replace(s, 'q'=>'Q', 'u'=>'U') == "QUick QUicker QUickest" + @test replace(s, 'q'=>'Q', r"u"=>'U') == "QUick QUicker QUickest" + @test replace(s, 'q'=>'Q', equalto('u')=>uppercase) == "QUick QUicker QUickest" + @test replace(s, 'q'=>'Q', islower=>'-') == "Q---- Q------ Q-------" + @test replace(s, ['q', 'u']=>'K') == "KKick KKicker KKickest" + @test replace(s, occursin("uq")=>'K') == "KKick KKicker KKickest" + @test replace(s, equalto('q')=>"B") == "Buick Buicker Buickest" + + @test replace(s, "qui"=>"A", 'r'=>'R') == "Ack AckeR Ackest" + @test replace(s, 'r'=>'x', islower=>uppercase) == "QUICK QUICKEx QUICKEST" + @test replace(s, islower=>uppercase, 'r'=>'x') == "QUICK QUICKER QUICKEST" + @test replace(s, "q"=>"z", islower=>uppercase, 'r'=>'x') == "zUICK zUICKER zUICKEST" + @test replace(s, "qui"=>"A", 'r'=>'x', islower=>uppercase) == "ACK ACKEx ACKEST" + @test replace(s, "qui"=>"A", 'r'=>'x', islower=>uppercase) == "ACK ACKEx ACKEST" + @test replace(s, r"q"=>"z", islower=>uppercase, 'r'=>'x') == "zUICK zUICKER zUICKEST" + @test_throws ErrorException("type String has no field match_data") replace(s, "q"=>s"a\1b") + @test_throws ErrorException("PCRE error: unknown substring") replace(s, r"q"=>s"a\1b") + end +end + @testset "chomp/chop" begin @test chomp("foo\n") == "foo" @test chomp("fo∀\n") == "fo∀"