你的位置:首页 > ASP.net教程

[ASP.net教程]用正则表达式从网页里面提取视频地址


//用正则表达式从网页里面提取视频地址//获得一个页面地址,拿到页面html,然后正则表达式去匹配视频地址//详细的看注释吧。 1///<summary>/// 网页视频处理类///</summary>///<history>/// ///</history>  public class WebVideo  {    ///<summary>/// 优酷、酷6、土豆等视频页面地址///</summary>    private string _pageUrl;        ///<summary>/// 是否启用页面压缩///</summary>    private bool _isCompressed;    ///<summary>/// 网站///</summary>    private VideoSite _site;    public WebVideo ()    {      // TODO: Complete member initialization    }    ///<summary>/// 实例化WebVideo类///</summary>///<param name="pageUrl">视频页面地址</param>///<param name="isCompressed">获取页面时是否启用压缩</param>    public WebVideo ( string pageUrl, bool isCompressed )    {      // TODO: Complete member initialization      this._pageUrl = pageUrl.Trim();      this._isCompressed = isCompressed;      this._site = this.GetSite(_pageUrl);    }        ///<summary>/// 根据Url地址得到网页的html源码/// (使用gzip,deflate压缩,延迟低)///</summary>///<param name="Url"></param>///<returns></returns>    public string GetWebContent ( string Url )    {      string strResult = "";      try      {        Stream decompressedStream = null;        //声明一个HttpWebRequest请求        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);        request.Accept = "*/*";        request.Headers.Set("Pragma", "no-cache");        //设置连接超时时间        request.Timeout = 9000;        request.UserAgent = "TaoCaiSpider1.0 Kevin-Gu's spider";        request.Headers.Add("Accept-Encoding", "gzip,deflate");        HttpWebResponse response = (HttpWebResponse)request.GetResponse();        string compressMode = response.ContentEncoding.ToLower();        Console.WriteLine(compressMode);        if (compressMode == "gzip")        {          decompressedStream             = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress);        }        else if (compressMode == "deflate")        {          decompressedStream            = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress);        }        else        {          // 貌似只有优酷启用了页面压缩。。。          decompressedStream = response.GetResponseStream();        }        Encoding encode = Encoding.GetEncoding(response.CharacterSet);        using (StreamReader streamReader = new StreamReader(decompressedStream, encode))        {          strResult = streamReader.ReadToEnd();        }      }      catch (Exception ex)      {        Console.WriteLine("error occored:" + ex.Message);      }      return strResult;    }167     ///<summary>/// 使用正则表达式匹配获取视频文件地址///</summary>///<param name="htmlContent"></param>///<returns></returns>    public string GetVideoFileUrl (string htmlContent)    {      string[] rgxArr =new string[]{         @"http://player\.youku\.com/player\.php/sid/[\w]{13}/v\.swf", //优酷的文件地址正则        @"http://player\.ku6\.com/refer/[\w]{16}/v\.swf",        @"http://js\.tudouui\.com/bin/player_online/[\w]+\.swf"      };      Regex rgx;      // 使用不同的正则表达式来匹配视频文件地址      switch (_site)      {        case VideoSite.YouKu:          rgx = new Regex(rgxArr[0]);          if (rgx.IsMatch(htmlContent))          {            return rgx.Match(htmlContent).ToString();          }          break;        case VideoSite.TuDou:          rgx = new Regex(rgxArr[2]);          if (rgx.IsMatch(htmlContent))          {            return rgx.Match(htmlContent).ToString();          }          break;        case VideoSite.Ku6:          rgx = new Regex(rgxArr[1]);          if (rgx.IsMatch(htmlContent))          {            return rgx.Match(htmlContent).ToString();          }          break;        default:          break;      }            return string.Empty;    }    ///<summary>/// 获得视频网页中视频文件地址///</summary>///<returns></returns>    public string GetVideoUrl ()    {      string videoUrl = string.Empty;      if (_isCompressed)      {        string html = this.GetWebContent(_pageUrl);        videoUrl = this.GetVideoFileUrl(html);      }      else      {        string html = this.GetHtmlWithoutCompress(_pageUrl);        videoUrl = this.GetVideoFileUrl(html);      }      return videoUrl;    }     }//end class  ///<summary>/// 视频网站枚举///</summary>  public enum VideoSite  {     YouKu=0,    Ku6=1,    TuDou=2,  };