Страница: 1 |
|
Вопрос: Кодировка при чтении web страницы
|
Добавлено: 16.02.09 18:14
|
|
Автор вопроса: gekko | Web-сайт: kalamfur.ru
|
Для получения кода страницы использовал пример из библиотеки:
http://vbnet.ru/faq/showtopic.asp?id=380
Споткнулся при чтении cwer.ru, т.к. он в UTF-8 и в текстовое поле вбивается чушь полная. Подскажите, как обойти глюк??
Ответить
|
Номер ответа: 1 Автор ответа: Winand
Вопросов: 87 Ответов: 2795
|
Web-сайт: winandfx.narod.ru Профиль | | #1
|
Добавлено: 17.02.09 00:44
|
Воспользуйся чем нибудь из этого:
- Option Explicit
- Public Const ISUTF8_DEFAULT_READSIZE = 1024
-
- Public Function Utf8Encode(ByRef strUnicode As String, ByRef bytUtf8() As Byte) As Long
- Dim lngUnicodeLength As Long
- Dim lngUtf8Size As Long
- Dim lngCharCode As Long
- Dim i As Long
-
- lngUnicodeLength = Len(strUnicode)
- If Not CBool(lngUnicodeLength) Then Exit Function
- ReDim bytUtf8((lngUnicodeLength * 3) - 1)
-
- For i = 1 To lngUnicodeLength Step 1
- lngCharCode = CLng(AscW(Mid$(strUnicode, i, 1))) And &HFFFF&
- If lngCharCode <= &H7F& Then
- bytUtf8(lngUtf8Size) = CByte(lngCharCode)
- lngUtf8Size = lngUtf8Size + 1
- ElseIf (lngCharCode >= &H80&) And (lngCharCode <= &H7FF&) Then
-
- bytUtf8(lngUtf8Size) = CByte((lngCharCode \ &H40&) Or &HC0&)
-
- bytUtf8(lngUtf8Size + 1) = CByte((lngCharCode And &H3F&) Or &H80&)
- lngUtf8Size = lngUtf8Size + 2
- ElseIf (lngCharCode >= &H800&) And (lngCharCode <= &HFFFF&) Then
-
- bytUtf8(lngUtf8Size) = CByte((lngCharCode \ &H1000&) Or &HE0&)
-
- bytUtf8(lngUtf8Size + 1) = CByte(((lngCharCode And &HFC0&) \ &H40&) Or &H80&)
-
- bytUtf8(lngUtf8Size + 2) = CByte((lngCharCode And &H3F&) Or &H80&)
- lngUtf8Size = lngUtf8Size + 3
- End If
- Next i
-
- If lngUtf8Size Then
- ReDim Preserve bytUtf8(lngUtf8Size - 1)
- Utf8Encode = lngUtf8Size
- End If
- End Function
-
- Public Function Utf8Decode(ByRef bytUtf8() As Byte, Optional ByRef strDefaultChar As String) As String
- Dim lngUtf8Size As Long
- Dim lngDefaultCharLength As Long
- Dim strBuffer As String
- Dim lngWriteLength As Long
- Dim bytUcs2Char(1) As Byte
- Dim i As Long
-
- On Error GoTo ExitFunction
- lngUtf8Size = UBound(bytUtf8)
- On Error GoTo 0
- lngDefaultCharLength = Len(strDefaultChar)
- If lngDefaultCharLength Then
- strBuffer = String$((lngUtf8Size + 1) * lngDefaultCharLength, vbNullChar)
- Else
- strBuffer = String$(lngUtf8Size + 1, vbNullChar)
- End If
- lngWriteLength = 1
-
- Do While i <= lngUtf8Size
- If bytUtf8(i) <= &H7F Then
- Mid(strBuffer, lngWriteLength, 1) = ChrW$(bytUtf8(i))
- lngWriteLength = lngWriteLength + 1
- i = i + 1
- ElseIf (bytUtf8(i) >= &HC2 And bytUtf8(i) <= &HDF) Then
- If (i + 1) <= lngUtf8Size Then
- If (bytUtf8(i + 1) >= &H80 And bytUtf8(i + 1) <= &HBF) Then
- bytUcs2Char(0) = ((bytUtf8(i) And &H3) * &H40) Or (bytUtf8(i + 1) And &H3F)
- bytUcs2Char(1) = (bytUtf8(i) And &H1C) \ &H4
- Mid(strBuffer, lngWriteLength, 1) = bytUcs2Char
- lngWriteLength = lngWriteLength + 1
- i = i + 2
- Else
- If lngDefaultCharLength Then GoSub SetDefaultChar
- i = i + 1
- End If
- Else
- If lngDefaultCharLength Then GoSub SetDefaultChar
- i = i + 1
- End If
- ElseIf (bytUtf8(i) >= &HE0 And bytUtf8(i) <= &HEF) Then
- If (i + 2) <= lngUtf8Size Then
- If (bytUtf8(i + 1) >= &H80 And bytUtf8(i + 1) <= &HBF) And _
- (bytUtf8(i + 2) >= &H80 And bytUtf8(i + 2) <= &HBF) Then
- bytUcs2Char(0) = ((bytUtf8(i + 1) And &H3) * &H40) Or (bytUtf8(i + 2) And &H3F)
- bytUcs2Char(1) = ((bytUtf8(i) And &HF) * &H10) Or ((bytUtf8(i + 1) And &H3C) \ &H4)
- Mid(strBuffer, lngWriteLength, 1) = bytUcs2Char
- lngWriteLength = lngWriteLength + 1
- i = i + 3
- Else
- If lngDefaultCharLength Then GoSub SetDefaultChar
- i = i + 1
- End If
- Else
- If lngDefaultCharLength Then GoSub SetDefaultChar
- i = i + 1
- End If
- Else
- If lngDefaultCharLength Then GoSub SetDefaultChar
- i = i + 1
- End If
- Loop
-
- Utf8Decode = Left$(strBuffer, lngWriteLength - 1)
-
- Exit Function
- SetDefaultChar:
- Mid(strBuffer, lngWriteLength, lngDefaultCharLength) = strDefaultChar
- lngWriteLength = lngWriteLength + lngDefaultCharLength
- Return
- ExitFunction:
- End Function
-
- Public Function IsUtf8(ByRef bytArray() As Byte, Optional ByVal lngReadSize As Long = ISUTF8_DEFAULT_READSIZE) As Boolean
- Dim lngArraySize As Long
- Dim lngReadPosition As Long
- Dim lngUtf8ByteSize As Long
- Dim lngIsUtf8 As Long
- Dim i As Long
-
- If Not CBool(lngReadSize) Then GoTo ExitFunction
- On Error GoTo ExitFunction
- lngArraySize = UBound(bytArray) + 1
- On Error GoTo 0
- If lngReadSize > lngArraySize Then lngReadSize = lngArraySize
-
- Do While lngReadPosition < lngReadSize
- If bytArray(lngReadPosition) <= &H7F Then
- lngReadPosition = lngReadPosition + 1
- ElseIf bytArray(lngReadPosition) < &HC0 Then
- Exit Function
- ElseIf (bytArray(lngReadPosition) >= &HC0) And (bytArray(lngReadPosition) <= &HFD) Then
- If (bytArray(lngReadPosition) And &HFC) = &HFC Then
- lngUtf8ByteSize = 5
- ElseIf (bytArray(lngReadPosition) And &HF8) = &HF8 Then
- lngUtf8ByteSize = 4
- ElseIf (bytArray(lngReadPosition) And &HF0) = &HF0 Then
- lngUtf8ByteSize = 3
- ElseIf (bytArray(lngReadPosition) And &HE0) = &HE0 Then
- lngUtf8ByteSize = 2
- ElseIf (bytArray(lngReadPosition) And &HC0) = &HC0 Then
- lngUtf8ByteSize = 1
- End If
- If (lngReadPosition + lngUtf8ByteSize) >= lngReadSize Then Exit Do
- For i = (lngReadPosition + 1) To (lngReadPosition + lngUtf8ByteSize) Step 1
- If Not ((bytArray(i) >= &H80) And (bytArray(i) <= &HBF)) Then Exit Function
- Next i
- lngIsUtf8 = lngIsUtf8 + 1
- lngReadPosition = lngReadPosition + lngUtf8ByteSize + 1
- Else
- lngReadPosition = lngReadPosition + 1
- End If
- Loop
-
- If lngIsUtf8 > 1 Then IsUtf8 = True
- ExitFunction:
- End Function
- Option Explicit
- Private Const CP_UTF8 = 65001
- Private Const CP_ACP = 0
- Private Declare Function GetACP Lib "Kernel32" () As Long
- Private Declare Function WideCharToMultiByte Lib "Kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpDefaultChar As Long, lpUsedDefaultChar As Long) As Long
- Private Declare Function MultiByteToWideChar Lib "Kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
-
- Private Function WToA(ByVal st As String, Optional ByVal cpg As Long = -1, Optional lFlags As Long = 0) As String
- Dim stBuffer As String
- Dim cwch As Long
- Dim pwz As Long
- Dim pwzBuffer As Long
- Dim lpUsedDefaultChar As Long
-
- If cpg = -1 Then cpg = GetACP()
- pwz = StrPtr(st)
- cwch = WideCharToMultiByte(cpg, lFlags, pwz, -1, 0&, 0&, ByVal 0&, ByVal 0&)
- stBuffer = String$(cwch + 1, vbNullChar)
- pwzBuffer = StrPtr(stBuffer)
- cwch = WideCharToMultiByte(cpg, lFlags, pwz, -1, pwzBuffer, Len(stBuffer), ByVal 0&, ByVal 0&)
- WToA = Left$(stBuffer, cwch - 1)
- End Function
-
- Private Function AToW(ByVal st As String, Optional ByVal cpg As Long = -1, Optional lFlags As Long = 0) As String
- Dim stBuffer As String
- Dim cwch As Long
- Dim pwz As Long
- Dim pwzBuffer As Long
-
- If cpg = -1 Then cpg = GetACP()
- pwz = StrPtr(st)
- cwch = MultiByteToWideChar(cpg, lFlags, pwz, -1, 0&, 0&)
- stBuffer = String$(cwch + 1, vbNullChar)
- pwzBuffer = StrPtr(stBuffer)
- cwch = MultiByteToWideChar(cpg, lFlags, pwz, -1, pwzBuffer, Len(stBuffer))
- AToW = Left$(stBuffer, cwch - 1)
- End Function
-
- Public Function EncodeUTF8(ByVal cnvUni As String) As String
- If cnvUni = vbNullString Then Exit Function
- EncodeUTF8 = StrConv(WToA(cnvUni, CP_UTF8), vbUnicode)
- End Function
-
- Public Function DecodeUTF8(ByVal cnvUni As String) As String
- If cnvUni = vbNullString Then Exit Function
- DecodeUTF8 = AToW(WToA(cnvUni, CP_ACP), CP_UTF8)
- End Function
- Option Explicit
-
-
- Private Function WinToUTF8(ByRef inString As String, _
- ByVal lMaxSize As Long) As String
-
- If inString = vbNullString Then Exit Function
-
- Dim hMemLock1 As Long, hMemLock2 As Long
- Dim iStrSize As Long
-
- hMemLock1 = GlobalAlloc(GPTR, lMaxSize)
- hMemLock2 = GlobalAlloc(GPTR, lMaxSize)
-
- iStrSize = MultiByteToWideChar(0&, 0&, inString, -1, hMemLock1, lMaxSize)
- iStrSize = WideCharToMultiByte(65001, 0&, hMemLock1, iStrSize, hMemLock2, lMaxSize, 0&, 0&)
-
- If Len(iStrSize) Then
- WinToUTF8 = String$(iStrSize - vbNull, 0&)
- Call CopyMemory(ByVal WinToUTF8, ByVal hMemLock2, iStrSize - vbNull)
- End If
-
- Call GlobalFree(hMemLock1)
- Call GlobalFree(hMemLock2)
- End Function
-
- Private Function UTF8ToWin(ByRef inString As String, _
- ByVal lMaxSize As Long) As String
-
- If inString = vbNullString Then Exit Function
-
- Dim hMemLock1 As Long, hMemLock2 As Long
- Dim iStrSize As Long
-
- hMemLock1 = GlobalAlloc(GPTR, lMaxSize)
- hMemLock2 = GlobalAlloc(GPTR, lMaxSize)
-
- iStrSize = MultiByteToWideChar(CP_UTF8, 0&, inString, -1, hMemLock1, lMaxSize)
- iStrSize = WideCharToMultiByte(0&, 0&, hMemLock1, -1, hMemLock2, iStrSize, 0&, 0&)
-
- If Len(iStrSize) Then
- UTF8ToWin = String$(iStrSize - vbNull, 0&)
- Call CopyMemory(ByVal UTF8ToWin, ByVal hMemLock2, iStrSize - vbNull)
- End If
-
- Call GlobalFree(hMemLock1)
- Call GlobalFree(hMemLock2)
- End Function
В последнем нужно добавить нужные объявления апи
Ответить
|
Номер ответа: 2 Автор ответа: gekko
Вопросов: 39 Ответов: 127
|
Web-сайт: kalamfur.ru Профиль | | #2
|
Добавлено: 17.02.09 07:27
|
Спасибо, Winand, снова выручил. 2 вариант - то, что надо.
Ответить
|
Страница: 1 |
Поиск по форуму