loading...

C# Multi String Replace Extension Method

zacharypatten profile image Zachary Patten ・5 min read

Hey guys. I wrote an extension method in C# that allows you to do multiple string replacements in one method call. This can be a better pattern than iterative replacements that will let future replacements run on the result of previous replacements.

This code is included in the Towel nuget package. Please check it out if you want to see more code like it. Thanks. :)

using System;
using System.Linq;
using Towel;

public class Program
{
    public static void Main()
    {
        /// basic example
        /// -------------

        string input1 = "a b c d e";
        string testresult = input1.Replace(("a", "aaa"), ("b", "bbb"), ("d", "ddd"));
        Console.WriteLine(testresult);

        /// caching example
        /// ---------------

        /// If you need to perform the same replacements on multiple strings, you
        /// can use ReplaceCached for a performance boost. However, once cached,
        /// any updates to the rules instance will not propogate until the relative
        /// cache is purged and rebuilt.

        string input2 = input1.Reverse();
        string input3 = "abcd";

        var rules = new (string From, string To)[]
        {
            (From: "a", To: "aaa"),
            (From: "b", To: "bbb"),
            (From: "d", To: "ddd"),
        };

        Console.WriteLine(input1.ReplaceCached(rules)); /// <- not cached (slower)
        Console.WriteLine(input2.ReplaceCached(rules)); /// <- cached (faster)
        Extensions.ClearReplaceCache();
        Console.WriteLine(input3.ReplaceCached(rules)); /// <- not cached (slower)
        Extensions.RemoveFromReplaceCache(rules);

        /// runtime exceptions
        /// ------------------

        /// Uncomment the following lines for the exception examples to avoid.

        /// ArgumentNullException's
        //_ = Extensions.Replace(null, ("a", "b"));
        //_ = input1.Replace(null);
        //_ = input1.Replace((null, "a"));
        //_ = input1.Replace(("a", null));

        /// ArgumentException's
        //_ = input1.Replace();
        //_ = input1.Replace(("a", "b"), ("a", "c"));
        //_ = input1.Replace((string.Empty, "hello world"));

        Console.WriteLine("Press [enter] to contiue...");
        Console.ReadLine();
    }
}

#region Framework Code

namespace Towel
{
    using System;
    using System.Collections.Generic;
    using System.Text.RegularExpressions;

    public static class Extensions
    {
        /// <summary>Returns a new <see cref="string"/> in which all occurrences of Unicode <see cref="string"/> patterns in this instance are replaced with a relative Unicode <see cref="string"/> replacements.</summary>
        /// <remarks>Uses Regex without a timeout.</remarks>
        /// <param name="this">The <see cref="string"/> to perform the replacements on.</param>
        /// <param name="rules">The patterns and relative replacements to apply to this <see cref="string"/>.</param>
        /// <returns>A new <see cref="string"/> in which all occurrences of Unicode <see cref="string"/> patterns in this instance are replaced with a relative Unicode <see cref="string"/> replacements.</returns>
        /// <exception cref="ArgumentNullException">Thrown if any of the parameters are null or contain null values.</exception>
        /// <exception cref="ArgumentException">Thrown if <paramref name="rules"/> is empty, <paramref name="rules"/> contains empty patterns, or <paramref name="rules"/> contains duplicate patterns.</exception>
        public static string Replace(this string @this, params (string Pattern, string Replacement)[] rules) => ReplaceBase(@this, rules, false);

        /// <summary>Returns a new <see cref="string"/> in which all occurrences of Unicode <see cref="string"/> patterns in this instance are replaced with a relative Unicode <see cref="string"/> replacements. Caches internal values relative to the instance of rules.</summary>
        /// <remarks>Uses Regex without a timeout. This method is not thread-safe.</remarks>
        /// <param name="this">The <see cref="string"/> to perform the replacements on.</param>
        /// <param name="rules">The patterns and relative replacements to apply to this <see cref="string"/>.</param>
        /// <returns>A new <see cref="string"/> in which all occurrences of Unicode <see cref="string"/> patterns in this instance are replaced with a relative Unicode <see cref="string"/> replacements.</returns>
        /// <exception cref="ArgumentNullException">Thrown if any of the parameters are null or contain null values.</exception>
        /// <exception cref="ArgumentException">Thrown if <paramref name="rules"/> is empty, <paramref name="rules"/> contains empty patterns, or <paramref name="rules"/> contains duplicate patterns.</exception>
        public static string ReplaceCached(this string @this, params (string Pattern, string Replacement)[] rules) => ReplaceBase(@this, rules, true);

        /// <summary>Cache for the <see cref="ReplaceCached"/> method.</summary>
        private readonly static Dictionary<object, (Regex Regex, Dictionary<string, string> RuleSet)> ReplaceCachedCache =
            new Dictionary<object, (Regex Regex, Dictionary<string, string> RuleSet)>();

        private static string ReplaceBase(string @this, (string Pattern, string Replacement)[] rules, bool cached)
        {
            if (@this is null) throw new ArgumentNullException(nameof(@this));
            if (rules is null) throw new ArgumentNullException(nameof(rules));
            if (rules.Length <= 0) throw new ArgumentException($"{nameof(rules)}.{nameof(rules.Length)} <= 0");
            if (!cached || !ReplaceCachedCache.TryGetValue(rules, out (Regex Regex, Dictionary<string, string> RuleSet) regexAndRuleSet))
            {
                regexAndRuleSet.RuleSet = new Dictionary<string, string>(rules.Length);
                foreach (var rule in rules)
                {
                    if (rule.Pattern is null) throw new ArgumentNullException(nameof(rules), $"{nameof(rules)} contains null {nameof(rule.Pattern)}s");
                    if (rule.Replacement is null) throw new ArgumentNullException(nameof(rules), $"{nameof(rules)} contains null {nameof(rule.Replacement)}s");
                    if (rule.Pattern == string.Empty) throw new ArgumentException(nameof(rules), $"{nameof(rules)} contains empty {nameof(rule.Pattern)}s");
                    if (!regexAndRuleSet.RuleSet.TryAdd(rule.Pattern, rule.Replacement)) throw new ArgumentException($"{nameof(rules)} contains duplicate {nameof(rule.Pattern)}s");
                }
                string regexPattern = string.Join("|", rules.Select(rule => Regex.Escape(rule.Pattern)));
                RegexOptions regexOptions = cached ? RegexOptions.Compiled : RegexOptions.None;

                // NOTE:
                // Regex has a matchTimeout parameter that prevents security threats of long-running
                // regex patterns. However, an overload with a TimeSpan parameter likely is not required as
                // this impementation is not allowing special syntax in the regex. It is only flat strings
                // that are OR-ed together.

                regexAndRuleSet.Regex = new Regex(regexPattern, regexOptions);
                if (cached)
                {
                    ReplaceCachedCache.Add(rules, regexAndRuleSet);
                }
            }
            return regexAndRuleSet.Regex.Replace(@this, match => regexAndRuleSet.RuleSet[match.Value]);
        }

        /// <summary>Clears the cache for the <see cref="ReplaceCached"/> method.</summary>
        public static void ClearReplaceCache() => ReplaceCachedCache.Clear();

        /// <summary>Removes a rule set from the <see cref="ReplaceCached"/> method cache if it exists.</summary>
        /// <param name="rules">The rule set to remove from the cache.</param>
        public static void RemoveFromReplaceCache((string Pattern, string Replacement)[] rules)
        {
            if (ReplaceCachedCache.ContainsKey(rules))
            {
                ReplaceCachedCache.Remove(rules);
            }
        }
    }
}

#endregion

The ReplaceCached method is comparable in speed to StringBuilder.Replace however there is one key difference. This method does not chain the rules. If one rule results in the pattern of another rule, it will not apply a second rule onto a replace that has already occured.

string result = "a b c d e".Replace(("a", "aaa"), ("b", "bbb"), ("aaa", "ERROR"))
// result = "aaa bbb c d e"

But if you do the comparable replace using StringBuilder.Replace...

StringBuilder stringBuilder = new stringBuilder("a b c d e");
stringBuilder.Replace("a", "aaa");
stringBuilder.Replace("b", "bbb");
stringBuilder.Replace("aaa", "ERROR");
result = stringBuilder.ToString();
// result = "ERROR bbb c d e"

Here are some benchmarks to get a rough idea, but there need to be more/better benchmarks to really compare them...

using System.Text;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Towel;

namespace MyBenchmarks
{
    public class ReplaceBenchmarks
    {
        const string input = "a b c d e f g h i j k l m n o p q r s t u v w x y z";

        [Benchmark]
        public string Towel()
        {
            return input.Replace(("a", "aaa"), ("b", "bbb"), ("d", "ddd"), ("p", "ppp"), ("z", "zzz"));
        }

        (string, string)[] rules = new[]
        {
            ("a", "aaa"),
            ("b", "bbb"),
            ("d", "ddd"),
            ("p", "ppp"),
            ("z", "zzz")
        };

        [Benchmark]
        public string TowelCached()
        {
            return input.ReplaceCached(rules);
        }

        [Benchmark]
        public string StringBuilder()
        {
            StringBuilder builder = new StringBuilder(input);
            builder.Replace("a", "aaa");
            builder.Replace("b", "bbb");
            builder.Replace("d", "ddd");
            builder.Replace("p", "ppp");
            builder.Replace("z", "zzz");
            return builder.ToString();
        }
    }

    public class Program
    {
        public static void Main()
        {
            var summary = BenchmarkRunner.Run<ReplaceBenchmarks>();
        }
    }
}

Benchmark results...


BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18363
Intel Core i7-4790K CPU 4.00GHz (Haswell), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=5.0.100-preview.6.20318.15
  [Host]     : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT
  DefaultJob : .NET Core 3.1.6 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.31603), X64 RyuJIT


Method Mean Error StdDev
Towel 4.992 us 0.1060 us 0.0885 us
TowelCached 1.533 us 0.0047 us 0.0040 us
StringBuilder 1.459 us 0.0053 us 0.0049 us

Posted on by:

zacharypatten profile

Zachary Patten

@zacharypatten

Just another programmer...

Discussion

pic
Editor guide