23 Feb 2004

用Asp.Net抓取web页面

Share

演示:
http://www.aspxboy.com/demo/286.aspx


代码高亮显示(HightLight in Source Code Viewer)


http://www.aspxboy.com/code/default.aspx/S7s2zPJbeI9PJA/OK
HpAVwVaUEhf02EgEOgeB3Yd5Z7fLL3ZNgdv7XIqertUOYm9kbQ62
LshbJBwvWPBBYa4EyVMhvMhyEDPH8chU5ornn8=.Aspx

<html>
<head>
 

<SCRIPT runat="server">
  void Page_Load(Object sender, EventArgs e) {

    WebRequest req = WebRequest.Create("http://www.im286.com/index.php");
 string urliii="src="; //图片使用绝对连接
 urliii+="http://www.im286.com/";
    try {
        WebResponse result = req.GetResponse();
        Stream ReceiveStream = result.GetResponseStream();

        Byte[] read = new Byte[512];
        int bytes = ReceiveStream.Read(read, 0, 512);

        lblHTML.Text = "";
        while (bytes > 0)
        {

          // 注意:
          // 下面假定响应使用 gb2312 作为编码方式。
          // 如果内容以 ANSI 代码页形式(例如,932)发送,则使用类似下面的语句:
          //Encoding encode = System.Text.Encoding.GetEncoding("shift-jis");
          Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
          lblHTML.Text = lblHTML.Text + encode.GetString(read, 0, bytes);
   
          bytes = ReceiveStream.Read(read, 0, 512);
   
        }
    } catch(Exception) {
        lblHTML.Text = "检索页时出错";
    }
 lblHTML.Text = lblHTML.Text.Replace("src=",""+urliii+"");
 lblHTML.Text = lblHTML.Text.Replace("\"","");
  }
</SCRIPT>
</head>

<body>
  <form method="post" action="Webrequest.aspx" runat="server">
   
    <asp:Label runat=server ID="lblHTML" Rows="30" Cols="80" EnableViewState="false"  Wrap="True"></asp:Label>
  </form>

</body>
</html>

<%@ Page language="C#" Trace="True" %>
<%@ Import Namespace="System.Net" %>
<%@ Import Namespace="System.IO" %>

comments powered by Disqus