string str = "AB100123485;AB10064279293-IP-1-KNPO;AB473898487-MM41";
string result = str.Split('-').Skip(2).FirstOrDefault();
Split()
call, you will end up with 6 allocations (1 for the array of strings, and 5 for the split strings).IndexOf()
, you would need to make two calls to IndexOf()
with no allocation overhead, and then make one allocation when you call Substring()
.Regex()
, you would need to allocate memory for all the infrastructure to run the state machine that the .NET Framework builds up, as well as, the cost of generating the state machine in the first place. Then when Match()
is called it will have to allocating the Match
object. Internally that would allocate a GroupCollection
that contains an allocation for a string that matches.IndexOf()
would be the most efficient, followed by Split()
, and then with the Regex
trailing.Regex
looks to be a winner below -- assuming that you and your succeeding maintainers understand regular expressions.using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
static class StringExtensions
{
public static IEnumerable<string> Tokens(this string input, params char [] delims)
{
var sb = new StringBuilder();
foreach(char ch in input)
{
if (delims.Contains(ch))
{
yield return sb.ToString();
sb.Clear();
}
else
{
sb.Append(ch);
}
}
if (sb.Length > 0)
yield return sb.ToString();
}
}
class Program
{
string GetIpUsingIndexOf(string input)
{
if (input == null)
return null;
string prefix = "IP-";
int start = input.IndexOf("IP-");
if (start < 0)
return null;
start += prefix.Length;
int end = input.IndexOf('-', start);
end = end >= 0 ? end : input.Length;
return end > start ? input.Substring(start, end - start) : null;
}
string GetIpUsingTokens(string input)
{
return input?.Tokens('-')
.SkipWhile(s => s != "IP")
.Skip(1)
.FirstOrDefault();
}
string GetIpUsingSplit(string input)
{
return input?.Split('-')
.SkipWhile(s => s != "IP")
.Skip(1)
.FirstOrDefault();
}
string GetIpUsingRegex(string input)
{
if (input == null)
return null;
return Regex.Match(input, "IP-(?<IPValue>[0-9]+)")
?.Groups["IPValue"]
?.Value;
}
void Run()
{
var tests = new string []
{
"sometext-IP-123-745",
"sometext-IP-123",
"IP-123",
"sometext-IP-123-moretext",
"IP-123-moretext",
"sometext-IX-123-moretext",
"",
null
};
foreach(var input in tests)
{
Console.WriteLine(GetIpUsingIndexOf(input));
Console.WriteLine(GetIpUsingTokens(input));
Console.WriteLine(GetIpUsingSplit(input));
Console.WriteLine(GetIpUsingRegex(input));
}
}
static void Main()
{
new Program().Run();
}
}
StringExtensions.Tokens()
is an attempt to have strings be allocated on demand as opposed to Split()
which will split the entire string all at once.GetIpUsing*()
, then int.TryParse()
should be called to ensure that a valid integer has been collected.