//用正则表达式从网页里面提取视频地址//获得一个页面地址,拿到页面html,然后正则表达式去匹配视频地址//详细的看注释吧。 1///<summary>/// 网页视频处理类///</summary>///<history>/// ///</history> public class WebVideo { ///<summary>/// 优酷、酷6、土豆等视频页面地址///</summary> private string _pageUrl; ///<summary>/// 是否启用页面压缩///</summary> private bool _isCompressed; ///<summary>/// 网站///</summary> private VideoSite _site; public WebVideo () { // TODO: Complete member initialization } ///<summary>/// 实例化WebVideo类///</summary>///<param name="pageUrl">视频页面地址</param>///<param name="isCompressed">获取页面时是否启用压缩</param> public WebVideo ( string pageUrl, bool isCompressed ) { // TODO: Complete member initialization this._pageUrl = pageUrl.Trim(); this._isCompressed = isCompressed; this._site = this.GetSite(_pageUrl); } ///<summary>/// 根据Url地址得到网页的html源码/// (使用gzip,deflate压缩,延迟低)///</summary>///<param name="Url"></param>///<returns></returns> public string GetWebContent ( string Url ) { string strResult = ""; try { Stream decompressedStream = null; //声明一个HttpWebRequest请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url); request.Accept = "*/*"; request.Headers.Set("Pragma", "no-cache"); //设置连接超时时间 request.Timeout = 9000; request.UserAgent = "TaoCaiSpider1.0 Kevin-Gu's spider"; request.Headers.Add("Accept-Encoding", "gzip,deflate"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); string compressMode = response.ContentEncoding.ToLower(); Console.WriteLine(compressMode); if (compressMode == "gzip") { decompressedStream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress); } else if (compressMode == "deflate") { decompressedStream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress); } else { // 貌似只有优酷启用了页面压缩。。。 decompressedStream = response.GetResponseStream(); } Encoding encode = Encoding.GetEncoding(response.CharacterSet); using (StreamReader streamReader = new StreamReader(decompressedStream, encode)) { strResult = streamReader.ReadToEnd(); } } catch (Exception ex) { Console.WriteLine("error occored:" + ex.Message); } return strResult; }167 ///<summary>/// 使用正则表达式匹配获取视频文件地址///</summary>///<param name="htmlContent"></param>///<returns></returns> public string GetVideoFileUrl (string htmlContent) { string[] rgxArr =new string[]{ @"http://player\.youku\.com/player\.php/sid/[\w]{13}/v\.swf", //优酷的文件地址正则 @"http://player\.ku6\.com/refer/[\w]{16}/v\.swf", @"http://js\.tudouui\.com/bin/player_online/[\w]+\.swf" }; Regex rgx; // 使用不同的正则表达式来匹配视频文件地址 switch (_site) { case VideoSite.YouKu: rgx = new Regex(rgxArr[0]); if (rgx.IsMatch(htmlContent)) { return rgx.Match(htmlContent).ToString(); } break; case VideoSite.TuDou: rgx = new Regex(rgxArr[2]); if (rgx.IsMatch(htmlContent)) { return rgx.Match(htmlContent).ToString(); } break; case VideoSite.Ku6: rgx = new Regex(rgxArr[1]); if (rgx.IsMatch(htmlContent)) { return rgx.Match(htmlContent).ToString(); } break; default: break; } return string.Empty; } ///<summary>/// 获得视频网页中视频文件地址///</summary>///<returns></returns> public string GetVideoUrl () { string videoUrl = string.Empty; if (_isCompressed) { string html = this.GetWebContent(_pageUrl); videoUrl = this.GetVideoFileUrl(html); } else { string html = this.GetHtmlWithoutCompress(_pageUrl); videoUrl = this.GetVideoFileUrl(html); } return videoUrl; } }//end class ///<summary>/// 视频网站枚举///</summary> public enum VideoSite { YouKu=0, Ku6=1, TuDou=2, };
原标题:用正则表达式从网页里面提取视频地址
关键词:正则表达式