|
很有意思的一段代码,代码实现对HTML内容中所有图片信息的提取,大家有时间的可以研究一下,完善一下程序。 protected ArrayList GetAList(string HtmlContent) { try { ArrayList arr = new ArrayList(); HtmlContent = content.Replace(\"\\r\\n\",\"\"); HtmlContent = content.Trim();
string partern = @\"\\<img(.*?)\\>\";//@\"<a (.*)>.*</a> \";//\"<a.*(?=Headline)(.|\\n)*?</a>\";//@\"/<a (.*)>.*<\\/\\a>/ \";
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(partern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.MatchCollection mc = regex.Matches(HtmlContent);
if(mc.Count <= 0) return arr; System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@\"(\'|\"\"|/)?[\\w_()]*(.jpg|.bmp|.gif|.png|.jpeg)\",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
foreach(Match m in mc) { System.Text.RegularExpressions.MatchCollection m2 = regex2.Matches(m.ToString()); if( m2.Count > 0 ) { arr.Add( m2[0].ToString().Substring(1) ); } } return arr;
} catch(Exception ex) { throw ex; }
} |
|
【收藏】【打印】【进入论坛】 |
|
|
|
|
|
|
|