星空网 > 软件开发 > ASP.net

C# Html格式内容转Csv内容包括table(重点在rowspan和colspan合并),p,div元素

Html格式内容转Csv内容,包括table(重点在rowspan和colspan合并),p,div元素,table不能包含嵌套功能。

 1 /// <summary> 2 /// Html格式内容转Csv内容包括table(重点在rowspan和colspan合并),p,div元素 3 /// </summary> 4 /// <param name="hrml"></param> 5 /// <returns></returns> 6 private string HtmlToCsv(string hrml) 7 { 8   HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); 9   doc.LoadHtml(hrml); 10   StringBuilder sbLines = new StringBuilder(); 11   HtmlAgilityPack.HtmlNodeCollection tList = doc.DocumentNode.SelectNodes("//table"); 12   if (tList != null) 13   { 14     foreach (HtmlAgilityPack.HtmlNode table in tList) 15     { 16       sbLines.AppendLine("#flag_table#,"); 17       HtmlAgilityPack.HtmlNodeCollection rows = table.SelectNodes("//tr"); 18       if (rows != null) 19       { 20         int colCount = 0; 21         StringBuilder sbTable = new StringBuilder(); 22         foreach (HtmlAgilityPack.HtmlNode td in rows[0].ChildNodes.Where(m => m.OriginalName.ToLower() == "td")) 23         { 24           HtmlAgilityPack.HtmlAttribute attr = td.Attributes["colspan"]; 25           int colspan = (attr != null) ? int.Parse(attr.Value) : 1; 26           colCount = colCount + colspan; 27         } 28         int rowCount = rows.Count; 29  30         string[][] arr = new string[rowCount][]; 31         for (int r = 0; r < rowCount; r++) 32         { 33           arr[r] = new string[colCount]; 34         } 35  36         //填充区域 37         for (int r = 0; r < rowCount; r++) 38         { 39           HtmlAgilityPack.HtmlNode tr = rows[r]; 40           List<HtmlAgilityPack.HtmlNode> cols = tr.ChildNodes.Where(m => m.OriginalName.ToLower() == "td").ToList(); 41  42           int colspan = 0; 43           int rowspan = 0; 44           for (int c = 0; c < cols.Count; c++) 45           { 46             HtmlAgilityPack.HtmlAttribute cAttr = cols[c].Attributes["colspan"]; 47             colspan = (cAttr != null) ? int.Parse(cAttr.Value) : 1; 48             HtmlAgilityPack.HtmlAttribute rAttr = cols[c].Attributes["rowspan"]; 49             rowspan = (rAttr != null) ? int.Parse(rAttr.Value) : 1; 50             string text = cols[c].InnerText.Replace("&nbsp;", "").Replace(",", ",").Replace("\r", "").Replace("\n", "").Trim(); 51  52             if (colspan == 1 && rowspan == 1) 53             { 54               continue; 55             } 56  57             bool isFirst = true; 58             int rFill = r + rowspan; 59             for (int ri = r; ri < rFill; ri++) 60             { 61               int cFill = c + colspan; 62               for (int ci = c; ci < cFill; ci++) 63               { 64                 if (isFirst) 65                 { 66                   text = (text == string.Empty) ? " " : text; 67                   arr[ri][ci] = text; 68                   isFirst = false; 69                 } 70                 else 71                 { 72                   arr[ri][ci] = string.Empty; 73                 } 74               } 75             } 76           } 77         } 78  79         //填充单元 80         for (int r = 0; r < rowCount; r++) 81         { 82           HtmlAgilityPack.HtmlNode tr = rows[r]; 83           List<HtmlAgilityPack.HtmlNode> cols = tr.ChildNodes.Where(m => m.OriginalName.ToLower() == "td").ToList(); 84           Queue<string> queue = new Queue<string>(); 85           for (int c = 0; c < cols.Count; c++) 86           { 87             string text = cols[c].InnerText.Replace("&nbsp;", "").Replace(",", ",").Replace("\r", "").Replace("\n", "").Trim(); 88             queue.Enqueue(text); 89           } 90           for (int c = 0; c < colCount; c++) 91           { 92             if (arr[r][c] == null) 93             { 94               string text = queue.Count > 0 ? queue.Dequeue() : string.Empty; 95               arr[r][c] = text; 96             } 97             else 98             { 99               if (arr[r][c] != string.Empty)100               {101                 if (queue.Count > 0)102                 {103                   queue.Dequeue();104                 }105               }106             }107           }108         }109 110         //组装成cvs格式内容111         foreach (string[] cols in arr)112         {113           foreach (string col in cols)114           {115             sbLines.Append(col + ",");116           }117           sbLines.AppendLine(",");118         }119         table.RemoveAll();120       }121     }122   }123 124   HtmlAgilityPack.HtmlNodeCollection pList = doc.DocumentNode.SelectNodes("//p");125   if (pList != null)126   {127     sbLines.AppendLine("#flag_text#,");128     foreach (HtmlAgilityPack.HtmlNode p in pList)129     {130       string text = p.InnerText.Replace("&nbsp;", "").Replace(",", ",").Replace("\r", "").Replace("\n", "").Trim();131       text = GetTextByHtml(text);132       if (!string.IsNullOrWhiteSpace(text))133       {134         sbLines.Append(text + ",");135         sbLines.AppendLine(",");136       }137       else138       {139         sbLines.AppendLine(",");140       }141       p.RemoveAll();142     }143   }144 145   HtmlAgilityPack.HtmlNodeCollection dList = doc.DocumentNode.SelectNodes("//div");146   if (pList != null)147   {148     sbLines.AppendLine("#flag_text#,");149     foreach (HtmlAgilityPack.HtmlNode div in pList)150     {151       string text = div.InnerText.Replace("&nbsp;", "").Replace(",", ",").Replace("\r", "").Replace("\n", "").Trim();152       text = GetTextByHtml(text);153       if (!string.IsNullOrWhiteSpace(text))154       {155         sbLines.Append(text + ",");156         sbLines.AppendLine(",");157       }158       else159       {160         sbLines.AppendLine(",");161       }162       //div.RemoveAll();163     }164   }165   return sbLines.ToString();166 }

 

html: 

C# Html格式内容转Csv内容包括table(重点在rowspan和colspan合并),p,div元素

 

csv:

C# Html格式内容转Csv内容包括table(重点在rowspan和colspan合并),p,div元素

 

url:http://www.cnblogs.com/dreamman/p/5343924.html

 




原标题:C# Html格式内容转Csv内容包括table(重点在rowspan和colspan合并),p,div元素

关键词:C#

C#
*特别声明:以上内容来自于网络收集,著作权属原作者所有,如有侵权,请联系我们: admin#shaoqun.com (#换成@)。

众合国际:https://www.goluckyvip.com/tag/49432.html
众汇付:https://www.goluckyvip.com/tag/49433.html
众齐网络:https://www.goluckyvip.com/tag/49434.html
众人创见:https://www.goluckyvip.com/tag/49435.html
众睿供应链:https://www.goluckyvip.com/tag/49436.html
众盛 深圳 科技物流:https://www.goluckyvip.com/tag/49437.html
【再放信号】美国Etsy即将放开中国卖家入驻,官方邮件你收到了吗?:https://www.kjdsnews.com/a/1836640.html
安庆市周边免费景点 安庆免费游玩的地方有哪些?:https://www.vstour.cn/a/365186.html
相关文章
我的浏览记录
最新相关资讯
海外公司注册 | 跨境电商服务平台 | 深圳旅行社 | 东南亚物流