Link to home
Start Free TrialLog in
Avatar of PECollingwood
PECollingwood

asked on

Can I use the Unicode variants of WININET API from VB6?

I have succesfully used the ?????A variants of the WININET API to scrape HTML page content from the web.
Here's the relevant DLL function declarations from my BAS file - the code to use these calls is widely available everywhere.

Private Declare Function InternetOpen Lib "wininet" Alias "InternetOpenA" ( _
   ByVal lpszAgent$, _
   ByVal dwAccessType&, _
   ByVal lpszProxyName$, _
   ByVal lpszProxyBypass$, _
   ByVal dwFlags&) As Long

Private Declare Function InternetConnect Lib "wininet" Alias "InternetConnectA" ( _
   ByVal internet_handle&, _
   ByVal lpszServerName$, _
   ByVal nServerPort&, _
   ByVal lpszUserName$, _
   ByVal lpszPassword$, _
   ByVal dwService&, _
   ByVal dwFlags&, _
   ByVal context_word&) As Long

Private Declare Function HttpOpenRequest Lib "wininet" Alias "HttpOpenRequestA" ( _
    ByVal hHttpSession&, _
    ByVal lpszVerb$, _
    ByVal lpszObjectName$, _
    ByVal lpszVersion$, _
    ByVal lpszReferer$, _
    ByVal lpszAcceptTypes$, _
    ByVal dwFlags&, _
    ByVal dwContext&) As Long

Private Declare Function HttpSendRequest Lib "wininet" Alias "HttpSendRequestA" ( _
    ByVal hHttpRequest&, _
    ByVal lpszHeaders$, _
    ByVal dwHeadersLength&, _
    ByVal lpOptional$, _
    ByVal dwOptionalLength&) As Boolean

Private Declare Function InternetReadFile Lib "wininet" ( _
    ByVal hFile&, _
    ByVal lpBuffer$, _
    ByVal dwNumberOfBytesToRead&, _
    ByRef lpNumberOfBytesRead&) As Boolean

The trouble is that when I acquire HTML source that is encoded in UTF-8 Unicode, it does not return the correct content.
I assume that the variants of the api type ????????W could be used as an alternative, but I can't get any instance I have seen to work.
Can anyone supply source examples?


Avatar of PaulHews
PaulHews
Flag of Canada image

Can you post an example URL where the content is encoded and you are not returning the desired results?
Avatar of PECollingwood
PECollingwood

ASKER

Hi,
this URL is a prime example:

http://babelfish.altavista.com/babelfish/tr?tt=urltext&url=x&lp=en_pt&doit=done&urltext=dog

This is enquiring for a Portuguese translation of th word 'dog' using AltaVista's BabelFish portal.
If you paste this URL into your IE who'll see the translation is 'cao', with the letter a capped with a tilde (~).

If you attempt to acquire the HTML content using a call like this:

DownloadURLToString "babelfish.altavista.com", "/babelfish/tr?tt=urltext&url=x&lp=en_pt&doit=done&urltext=dog"

to the code below, the text content representing the translated word is different.
This can be proved by saving the string to a .html file and then browsing this using IE.

What's going on?

Thanks,
Paul

---------------------------------------------------------------------------------------------------------------------------------------------

Option Explicit

Private Const INTERNET_OPEN_TYPE_DIRECT As Long = 1

Private Const INTERNET_SERVICE_HTTP  As Long = 3

Private Const INTERNET_FLAG_NO_COOKIES As Long = &H80000
Private Const INTERNET_FLAG_NO_CACHE_WRITE As Long = &H4000000
Private Const INTERNET_FLAG_RELOAD = &H80000000

Private Declare Function InternetOpen Lib "wininet" Alias "InternetOpenA" ( _
   ByVal lpszAgent$, _
   ByVal dwAccessType&, _
   ByVal lpszProxyName$, _
   ByVal lpszProxyBypass$, _
   ByVal dwFlags&) As Long

Private Declare Function InternetCloseHandle Lib "wininet" ( _
   ByVal hEnumHandle&) As Long
     
Private Declare Function InternetConnect Lib "wininet" Alias "InternetConnectA" ( _
   ByVal internet_handle&, _
   ByVal lpszServerName$, _
   ByVal nServerPort&, _
   ByVal lpszUserName$, _
   ByVal lpszPassword$, _
   ByVal dwService&, _
   ByVal dwFlags&, _
   ByVal context_word&) As Long

Private Declare Function HttpOpenRequest Lib "wininet" Alias "HttpOpenRequestA" ( _
    ByVal hHttpSession&, _
    ByVal lpszVerb$, _
    ByVal lpszObjectName$, _
    ByVal lpszVersion$, _
    ByVal lpszReferer$, _
    ByVal lpszAcceptTypes$, _
    ByVal dwFlags&, _
    ByVal dwContext&) As Long

Private Declare Function HttpSendRequest Lib "wininet" Alias "HttpSendRequestA" ( _
    ByVal hHttpRequest&, _
    ByVal lpszHeaders$, _
    ByVal dwHeadersLength&, _
    ByVal lpOptional$, _
    ByVal dwOptionalLength&) As Boolean

Private Declare Function InternetQueryDataAvailable Lib "wininet" ( _
    ByVal hFile&, _
    ByRef lpdwNumberOfBytesAvailable&, _
    ByVal dwFlags&, _
    ByVal dwContext&) As Boolean

Private Declare Function InternetReadFile Lib "wininet" ( _
    ByVal hFile&, _
    ByVal lpBuffer$, _
    ByVal dwNumberOfBytesToRead&, _
    ByRef lpNumberOfBytesRead&) As Boolean


Public Function DownloadURLToString(ByVal server_url$, ByVal path_url$, ByRef return_string$) As Boolean
   Dim open_handle&, connect_handle&, request_handle&, result As Boolean, bytes_read&, total_string$
   Dim string_buffer As String * 1024
   
   On Error Resume Next
   
   DownloadURLToString = False
   
   open_handle = InternetOpen(App.Title, _
                              INTERNET_OPEN_TYPE_DIRECT, _
                              vbNullString, vbNullString, 0)
   
   If open_handle Then
   
      connect_handle = InternetConnect(open_handle, _
                                       server_url, _
                                       80, _
                                       "", "", _
                                       INTERNET_SERVICE_HTTP, _
                                       0, 0)
      If connect_handle Then
     
         request_handle = HttpOpenRequest(connect_handle, _
                                          "GET", _
                                          path_url, _
                                          "HTTP/1.0", vbNullString, vbNullString, _
                                          INTERNET_FLAG_NO_COOKIES Or INTERNET_FLAG_NO_CACHE_WRITE Or INTERNET_FLAG_RELOAD, _
                                          0)

         If request_handle Then

            result = HttpSendRequest(request_handle, vbNullString, 0, vbNullString, 0)
   
            If result Then
               
               Do
                  DoEvents
                  result = InternetReadFile(request_handle, string_buffer, Len(string_buffer), bytes_read)
                 
                  If result Then
                     If bytes_read > 0 Then
                        total_string = total_string & Left$(string_buffer, bytes_read)
                     End If
                  Else
                     Exit Do
                  End If
                 
               Loop While bytes_read > 0
   
               If result Then
                  return_string = total_string
               
                  DownloadURLToString = True
               End If
               
            End If
   
            InternetCloseHandle request_handle
         
         End If
   
         InternetCloseHandle connect_handle
         
      End If

      InternetCloseHandle open_handle

   End If
   
End Function


You can encode the string correctly as UTF8 before writing it to disk:

Option Explicit
Private Declare Function WideCharToMultiByte Lib "kernel32" _
    (ByVal CodePage As Long, _
     ByVal dwFlags As Long, _
     ByVal lpWideCharStr As Long, _
     ByVal cchWideChar As Long, _
     ByRef lpMultiByteStr As Any, _
     ByVal cchMultiByte As Long, _
     ByVal lpDefaultChar As String, _
     ByVal lpUsedDefaultChar As Long) As Long
Const CP_UTF8 = 65001



Private Sub Command1_Click()
    Dim strText As String
    Dim bytBuffer() As Byte
    Dim lngLen As Long
   
    'strText = OpenURL("http://babelfish.altavista.com/babelfish/tr?tt=urltext&url=x&lp=en_pt&doit=done&urltext=dog")
    DownloadURLToString "babelfish.altavista.com", "/babelfish/tr?tt=urltext&url=x&lp=en_pt&doit=done&urltext=dog", strText
   
    bytBuffer = Utf8Encode(strText)
    If UBound(bytBuffer) > -1 Then
        Open "C:\test\test.html" For Binary As #1
        Put #1, , bytBuffer
        Close
    End If
   
   
End Sub

Public Function Utf8Encode(ByRef strUnicode As String) As Byte()
    Dim lngRes As Long
    Dim b() As Byte
    Dim lngUnicodeLen As Long
    Dim lngBufferSize As Long
    ReDim b(-1 To -1)
   
    lngUnicodeLen = Len(strUnicode)
    If lngUnicodeLen > 0 Then
        lngBufferSize = lngUnicodeLen * 3 + 1  'ensure adequate space for UTF wide char
        ReDim b(lngBufferSize - 1)
        lngRes = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), lngUnicodeLen, b(0), lngBufferSize, vbNullString, 0)
        If lngRes Then
            'trim to correct length
            ReDim Preserve b(lngRes - 1)
        End If
        Utf8Encode = b
    End If
   
End Function


Sorry, change this:

Public Function Utf8Encode(ByRef strUnicode As String) As Byte()
    Dim lngRes As Long
    Dim b() As Byte
    Dim lngUnicodeLen As Long
    Dim lngBufferSize As Long
    ReDim b(-1 To -1)
   
    lngUnicodeLen = Len(strUnicode)
    If lngUnicodeLen > 0 Then
        lngBufferSize = lngUnicodeLen * 3 + 1  'ensure adequate space for UTF wide char
        ReDim b(lngBufferSize - 1)
        lngRes = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), lngUnicodeLen, b(0), lngBufferSize, vbNullString, 0)
        If lngRes Then
            'trim to correct length
            ReDim Preserve b(lngRes - 1)
        End If
    End If
    Utf8Encode = b
End Function
PaulHews,
Ok that's sorted out the Portuguese - can you pinpoint the problem here?

This URL:

http://babelfish.altavista.com/babelfish/tr?tt=urltext&url=x&lp=en_ko&doit=done&urltext=dog

This is enquiring for a Korean translation of th word 'dog' using AltaVista's BabelFish portal.

I'm getting garbage for this - even with your suggested solution. IE of course, shows Korean alphabet symbols correctly.

Can you confirm that you are as well?
Any ideas how I fix this?
ASKER CERTIFIED SOLUTION
Avatar of PaulHews
PaulHews
Flag of Canada image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
PaulHews,
thanks - solution found using your help!