你的位置:首页 > ASP.net教程

[ASP.net教程]支持Cookie并开放了一些特殊设置项的HttpWebClient


 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Net; 6 using System.IO; 7 using System.Collections.Specialized; 8 using System.Web; 9  10 namespace Common.Helpers 11 { 12   /// <summary> 13   /// 网络访问辅助类 14   /// </summary> 15   public class HttpWebClient : WebClient 16   { 17     #region 公共属性 18     /// <summary> 19     /// 浏览器用户标识,默认采用Chrome的标识 20     /// </summary> 21     public string UserAgent { get; set; } 22     /// <summary> 23     /// Cookie容器 24     /// </summary> 25     public CookieContainer CookieContainer { get; set; } 26     /// <summary> 27     /// 如果 POST 请求需要 100-Continue 响应,则为 true;否则为 false。 28     /// </summary> 29     public bool Expect100Continue { get; set; } 30  31     private WebResponse m_LastWebResponse = null; 32     /// <summary> 33     /// 最后一次的响应对象 34     /// </summary> 35     public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } } 36  37     private int m_Timeout = 120000; 38     /// <summary> 39     /// 超时时间,默认120000毫秒(120秒) 40     /// </summary> 41     public int Timeout 42     { 43       get { return m_Timeout; } 44       set { m_Timeout = value; } 45     } 46  47     private HttpWebClientSetting m_HttpWebClientSetting = null; 48     /// <summary> 49     /// WebClient设置项,该属性始终不会为null 50     /// </summary> 51     public HttpWebClientSetting HttpWebClientSetting 52     { 53       get 54       { 55         if (m_HttpWebClientSetting == null) 56         { 57           m_HttpWebClientSetting = new HttpWebClientSetting(); 58         } 59         return m_HttpWebClientSetting; 60       } 61       set 62       { 63         m_HttpWebClientSetting = value ?? new HttpWebClientSetting(); 64       } 65     } 66      67  68     /// <summary> 69     /// 预处理Web请求对象的委托方法(会在每次获取WebRequest对象后调用),默认值为null 70     /// </summary> 71     public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; } 72     #endregion 73  74     #region 构造方法 75     public HttpWebClient() 76       : this(new CookieContainer()) 77     { 78     } 79  80     public HttpWebClient(CookieContainer cookieContainer) 81     { 82       this.CookieContainer = cookieContainer; 83       this.UserAgent = UserAgentValues.FireFox; 84       this.Expect100Continue = false; 85     } 86     #endregion 87  88     #region 重写方法,增加对CookieContainer的支持 89     protected override WebRequest GetWebRequest(Uri address) 90     { 91       if (!string.IsNullOrEmpty(this.UserAgent)) 92       { 93         this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent); 94       } 95  96       WebRequest request = base.GetWebRequest(address); 97       request.Timeout = this.Timeout; 98        99       if (request is HttpWebRequest)100       {101         HttpWebRequest httpRequest = request as HttpWebRequest;102         httpRequest.CookieContainer = this.CookieContainer;103         httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue104 105         //读取自定义设置项106         if (this.HttpWebClientSetting != null)107         {108           httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;109         }110 111         //使用外部委托属性处理Request对象112         if (this.PrepareProcessWebRequest != null)113         {114           this.PrepareProcessWebRequest(httpRequest);115         }116       }117       118       return request;119     }120     #endregion121 122     #region 重写方法,增加对响应对象的访问123     protected override WebResponse GetWebResponse(WebRequest request)124     {125       WebResponse response = base.GetWebResponse(request);126       this.m_LastWebResponse = response;127       return response;128     }129     #endregion130 131     #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData132     /// <summary>133     /// 向一个URL用POST提交数据,并返回其响应内容134     /// ZhangQingFeng  2014-12-14  Add135     ///  EditLog:136     ///    ZhangQingFeng  2015-05-12  Edit    因WebClient的UpdateValues方法中固定为UTF-8格式进行UrlEncode,因此此处需用UploadString方式来间接实现  --见微软WebClient类源码UploadValuesInternal方法中137     /// </summary>138     /// <param name="url">请求的URL</param>139     /// <param name="data">要提交的数据</param>140     /// <param name="encoding">请求所使用的编码</param>141     /// <param name="responseEncoding">响应内容所使用的编码,为null时使用请求的编码</param>142     /// <returns>响应的内容</returns>143     public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)144     {145       WebClient client = this;146 147       /*148       client.Encoding = encoding ?? Encoding.UTF8;149 150       byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());151 152       string html = string.Empty;153 154       if (responseEncoding == null)155       {156         html = client.Encoding.GetString(response);157       }158       else159       {160         html = responseEncoding.GetString(response);161       }162       */163 164       client.Encoding = encoding ?? Encoding.UTF8;165       client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");166 167       string delimiter = String.Empty;168       StringBuilder values = new StringBuilder();169       foreach (string name in data.AllKeys)170       {171         values.Append(delimiter);172         values.Append(HttpUtility.UrlEncode(name, encoding));173         values.Append("=");174         values.Append(HttpUtility.UrlEncode(data[name], encoding));175         delimiter = "&";176       }177 178       byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));179       string html = (responseEncoding ?? client.Encoding).GetString(arrData);180 181       return html;182     }183 184     /// <summary>185     /// 向一个URL用POST提交数据,并返回其响应内容186     /// ZhangQingFeng  2014-12-14  Add187     /// </summary>188     /// <param name="url">请求的URL</param>189     /// <param name="data">要提交的数据</param>190     /// <param name="encoding">请求和响应所使用的编码</param>191     /// <returns>响应的内容</returns>192     public string PostData(string url, NameValueCollection data, Encoding encoding)193     {194       return PostData(url, data, encoding, null);195     }196 197     /// <summary>198     /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)199     /// ZhangQingFeng  2014-12-14  Add200     /// </summary>201     /// <param name="url">请求的URL</param>202     /// <param name="data">要提交的数据</param>203     /// <returns>响应的内容</returns>204     public string PostData(string url, NameValueCollection data)205     {206       return PostData(url, data, this.Encoding);207     }208     #endregion209 210     #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData211     /// <summary>212     /// 向一个URL用POST提交数据,并返回其响应内容213     /// ZhangQingFeng  2014-12-14  Add214     /// </summary>215     /// <param name="url">请求的URL</param>216     /// <param name="data">要提交的数据</param>217     /// <param name="encoding">请求和响应内容所使用的编码</param>218     /// <returns>响应的内容</returns>219     public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)220     {221       NameValueCollection postData = new NameValueCollection();222       if (data != null)223       {224         foreach (var item in data)225         {226           postData.Add(item.Key, item.Value);227         }228       }229       return PostData(url, postData, encoding, responseEncoding);230     }231 232 233     /// <summary>234     /// 向一个URL用POST提交数据,并返回其响应内容235     /// ZhangQingFeng  2014-12-14  Add236     /// </summary>237     /// <param name="url">请求的URL</param>238     /// <param name="data">要提交的数据</param>239     /// <param name="encoding">请求和响应所使用的编码</param>240     /// <returns>响应的内容</returns>241     public string PostData(string url, Dictionary<string, string> data, Encoding encoding)242     {243       return PostData(url, data, encoding, null);244     }245 246     /// <summary>247     /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)248     /// ZhangQingFeng  2014-12-14  Add249     /// </summary>250     /// <param name="url">请求的URL</param>251     /// <param name="data">要提交的数据</param>252     /// <returns>响应的内容</returns>253     public string PostData(string url, Dictionary<string, string> data)254     {255       return PostData(url, data, this.Encoding);256     }257     #endregion258 259     #region 辅助类260     /// <summary>261     /// 浏览器用户标识类262     /// </summary>263     public class UserAgentValues264     {265       public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";266       public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";267       public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";268     }269     #endregion270   }271 272   /// <summary>273   /// HttpWebClient对象设置类274   /// </summary>275   public class HttpWebClientSetting276   {277     private bool m_AllowAutoRedirect = true;278     /// <summary>279     /// 当响应内容为重定向时客户端是否自动重定向(如果该属性为true,则取到的响应则为重定向后的内容,否则则为响应原文),默认值为true280     /// </summary>281     public bool AllowAutoRedirect282     {283       get { return m_AllowAutoRedirect; }284       set { m_AllowAutoRedirect = value; }285     }286   }287 }

HttpWebClient

在做页面抓取的过程中,发现自带的WebClient不够灵活,因此做了一个实现。

 

关于在PostData方法中不使用UploadValues()方法的原因:

1.查看微软的源代码实现时发现,无论设置请求时的Encoding是否为GB2312,在使用WebClient的UploadValues()上传内容时,其内在都是使用UTF-8编码进行UrlEncode,因此传到服务端中的数据中若包含有中文时则一定会乱码,因此重写PostData以规避此问题。

 

关于HttpWebClientSetting中的AllowAutoRedirect属性:

在WebClient发起请求时,若响应内容为重定向,则WebClient会自动做重定向,因此该类提供此设置项以控制在访问时是否自动做重定向(第二次访问Refer后的网站时会将请求中的Refer头置空,将该AllowAutoRedirect设置为false,然后手动从Response.Header中取出Location对象地址,设置Refer后再访问,则可真实模拟浏览器访问,从而避开一些网站的防抓取设置)

 

关于HttpWebClient中的LastWebResponse属性:

当存在多次重定向时,系统记录了最后一次返回的内容,从此内容的Header中取出ResponseUri,则可以取到最后返回响应的页面真实地址,从而为下一次的设置请求Refer头作准备。

 

大约就是如此,后期如有Bug会继续更新。