Пишу программу для автоматического сбора информации с WEB сайтов. И столкнулся с такой проблемой: некоторые сайты не отвечают на GET запросы из программы. Например www.mail.ru прекрасно грузится, а www.ebay.com не хочет.
Не пойму в чем проблема! Спецы по HTTP, TCP/IP подскажите в чем дело?
Код привожу ниже:
#Compile Exe
#Dim All
Function GetWebPage (sURL As String, sHeader As String, sWebPage As String, Optional sFormData As String) As Long
Local lTCP As Long
Local sHost As String
Local sFile As String
Local lFormData As Long
Local sMiniBufr As String
Local sBuffer As String
Local sDCRLF As String
Local sRequest As String
sDCRLF = $CrLf & $CrLf
If Len(sURL) = 0 Then Exit Function
lTCP = FreeFile
If VarPtr(sFormData) Then lFormData = 1
sHost = Extract$(sURL, "/")
sFile = "/" & Remain$(sURL, "/")
ErrClear
Tcp Open "http" At sHost As lTCP TimeOut 15000
If lFormData Then
sRequest = "POST " & sFile & " HTTP/1.0" & $CrLf
sRequest = sRequest & "Content-Type: text/html" & $CrLf
sRequest = sRequest & "Content-Length: " & Str$(Len(sFormData)) & $CrLf
sRequest = sRequest & sFormData & $CrLf & $CrLf
Else
sRequest = "GET " & sFile & " HTTP/1.0" & $CrLf & $CrLf
End If
ErrClear
Tcp Print lTCP, sRequest
If Err Then Exit Function
sMiniBufr = Space$(4096)
Do
Tcp Recv lTCP, 4096, sMiniBufr
sBuffer = sBuffer & sMiniBufr
Loop While Len(sMiniBufr) And IsFalse(Err)
Tcp Close lTCP
sHeader = Extract$(sBuffer, sDCRLF)
sWebPage = Remain$(sBuffer,sDCRLF)
Function = 1
End Function
Function PBMain () As Long
Local sHeader As String
Local sWebPage As String
Call GetWebPage("www.mail.ru",sHeader,sWebPage)
Open "mail.html" For Output As #1
Print #1, sWebPage
Close #1
Call GetWebPage("www.ebay.com",sHeader,sWebPage)
Open "ebay.html" For Output As #1
Print #1, sWebPage
Close #1
End Function
Ответить
|