测试代码:
public static void Main(string[] args)
{
string text="<p>sdfasdfsa</p>sxcvxc<Img src=><p>23424</p>";
string regex=@"(?is)<p[^>]*>(?><p[^>]*>(?<o>)|</p>(?<-o>)|(?:(?!</?p\b).)*)*(?(o)(?!))</p>";
GetListByHtml(text, regex);
Console.ReadKey();
}
public static void GetListByHtml(string text,string pat)
{
System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(pat, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Match m = r.Match(text);
//int matchCount = 0;
while (m.Success)
{
Console.WriteLine(m.Value);
m = m.NextMatch();
}
}
这是抽取herf的例子 string regexs = "href=[\\\"\\\'](http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?[\\\"\\\']";