Option Explicit
Dim WithEvents Web1 As InternetExplorer
Dim wdApp As Word.Application
Private Sub CommandButton1_Click()
Dim strHTML As String
Dim strURL As String
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
Set Web1 = CreateObject("InternetExplorer.Application")
Web1.Visible = True
strURL = "http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal"
Web1.navigate strURL
End Sub
Function GetLanguage(strText As String) As String
Dim doc As Word.Document
wdApp.CheckLanguage = True
Set doc = Documents.Add
doc.Range.Paste
Select Case doc.Range.LanguageID
Case 2057
GetLanguage = "English"
Case 1036
GetLanguage = "French"
'Case 1033 ...
End Select
doc.Close wdDoNotSaveChanges
End Function
Private Sub Web1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
WriteLog "DocumentComplete"
If (pDisp Is Web1) Then
MsgBox GetLanguage(Web1.Document.body.innerText)
End If
End Sub
Private Sub Web1_StatusTextChange(ByVal Text As String)
WriteLog "StatusTextChange: " & Text
End Sub
Sub WriteLog(ByVal Text As String)
Dim f As Integer
Dim strFileName As String
strFileName = "web" & Format$(Now, "MMy") & ".log"
Text = Format$(Now, "HH:nn:ss") & " " & Text
Debug.Print Text
f = FreeFile
Open ActiveWorkbook.Path & "\" & strFileName For Append As #f
Print #f, Text
Close #f
End Sub
Private Sub CommandButton1_Click()
Dim strURL As String
If wdApp Is Nothing Then
On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If wdApp Is Nothing Then
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
End If
End If
Set Web1 = CreateObject("InternetExplorer.Application")
Web1.Visible = True
strURL = "http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal"
Web1.navigate strURL
End Sub
Function GetLanguage(strText As String) As String
Dim doc As Word.Document
Dim p As Integer
Dim para As Paragraph
Dim i As Integer
Dim L As Integer
Dim LID As WdLanguageID
Dim Languages() As WdLanguageID
Dim Paracounts() As Integer
Dim iMax As Integer
Dim iMaxID As WdLanguageID
L = -1
wdApp.CheckLanguage = True
WriteLog "Length: " & Len(strText)
Set doc = Documents.Add
doc.Range.Text = strText
For Each para In doc.Paragraphs
LID = para.Range.LanguageID
For i = 0 To L
If LID = Languages(i) Then
Paracounts(i) = Paracounts(i) + 1
Exit For
End If
Next i
If i > L Then
ReDim Preserve Languages(i)
ReDim Preserve Paracounts(i)
Paracounts(i) = 1
Languages(i) = LID
L = i
End If
Next para
For i = 0 To L
If iMax < Paracounts(i) Then
iMax = Paracounts(i)
iMaxID = Languages(i)
End If
Next i
WriteLog "Count: " & iMax & ", ID: " & iMaxID
Select Case iMaxID
Case 2057
GetLanguage = "English"
Case 1036
GetLanguage = "French"
'Case 1033 ...
End Select
doc.Close wdDoNotSaveChanges
End Function
Private Sub Web1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
WriteLog "DocumentComplete"
If (pDisp Is Web1) Then
MsgBox GetLanguage(Web1.Document.body.innerText)
End If
End Sub
Private Sub Web1_StatusTextChange(ByVal Text As String)
WriteLog "StatusTextChange: " & Text
End Sub
Sub WriteLog(ByVal Text As String)
Dim f As Integer
Dim strFileName As String
strFileName = "web" & Format$(Now, "MMy") & ".log"
Text = Format$(Now, "HH:nn:ss") & " " & Text
Debug.Print Text
f = FreeFile
Open ActiveWorkbook.Path & "\" & strFileName For Append As #f
Print #f, Text
Close #f
End Sub
Option Explicit
Dim WithEvents Web1 As InternetExplorer
Dim wdApp As Word.Application
Dim r As Integer
Dim sh As Worksheet
Dim bNewWordApp As Boolean
Private Sub CommandButton1_Click()
Dim strURL As String
If wdApp Is Nothing Then
On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If wdApp Is Nothing Then
Set wdApp = CreateObject("Word.Application")
bNewWordApp = True
End If
End If
wdApp.Visible = True
Set Web1 = CreateObject("InternetExplorer.Application")
Web1.Visible = True
Set sh = ActiveWorkbook.Worksheets(3)
r = 2
strURL = sh.Cells(r, 1).value
Web1.navigate strURL
WriteLog "Navigating to " & strURL
End Sub
Function GetLanguage(strText As String) As String
Dim doc As Word.Document
Dim p As Integer
Dim para As Paragraph
Dim i As Integer
Dim L As Integer
Dim LID As WdLanguageID
Dim Languages() As WdLanguageID
Dim Paracounts() As Integer
Dim iMax As Integer
Dim iMaxID As WdLanguageID
WriteLog "GetLanguage. Text length: " & Len(strText)
L = -1
wdApp.CheckLanguage = True
Set doc = Documents.Add
doc.Range.Text = strText
doc.Range.CheckSpelling
doc.Range.Select
WriteLog "GetLanguage. Paragraph count: " & doc.Paragraphs.Count
For Each para In doc.Paragraphs
para.Range.Select 'seems necessary to provoke automatic language detecion
DoEvents
LID = para.Range.LanguageID
For i = 0 To L
If LID = Languages(i) Then
Paracounts(i) = Paracounts(i) + 1
Exit For
End If
Next i
If i > L Then
ReDim Preserve Languages(i)
ReDim Preserve Paracounts(i)
Paracounts(i) = 1
Languages(i) = LID
L = i
End If
Next para
For i = 0 To L
If iMax < Paracounts(i) Then
iMax = Paracounts(i)
iMaxID = Languages(i)
End If
Next i
WriteLog "Max language count: " & iMax & ", ID: " & iMaxID
'End If
Select Case iMaxID
'Select Case doc.Range.LanguageID
Case 2057, 1033, 3081
GetLanguage = "English"
Case 1036, 3084
GetLanguage = "French"
Case 1034, 3082
GetLanguage = "Spanish"
Case Else
GetLanguage = "Language ID: " & iMaxID & " not yet encoded in this macro"
End Select
doc.Close wdDoNotSaveChanges
End Function
Private Sub Web1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Dim strURL As String
WriteLog "DocumentComplete: " & URL
If (pDisp Is Web1) Then
sh.Cells(r, 2).value = GetLanguage(Web1.Document.body.innerText)
r = r + 1
If sh.Cells(r, 1).value <> "" Then
strURL = sh.Cells(r, 1).value
WriteLog "Navigating to " & strURL
Web1.navigate strURL
Else
If bNewWordApp Then
wdApp.Quit
End If
End If
End If
End Sub
Private Sub Web1_StatusTextChange(ByVal Text As String)
WriteLog "StatusTextChange: " & Text
End Sub
Sub WriteLog(ByVal Text As String)
Dim f As Integer
Dim strFileName As String
strFileName = "web" & Format$(Now, "MMy") & ".log"
Text = Format$(Now, "HH:nn:ss") & " " & Text
Debug.Print Text
f = FreeFile
Open ActiveWorkbook.Path & "\" & strFileName For Append As #f
Print #f, Text
Close #f
End Sub
5.2.The Services and Site may not be used by scripts, machines or automated Services without express written consent of the Site ownership.Which is exactly what you are doing above...
Web Page Language Identification Based on URLs:
http://www.vldb.org/pvldb/1/1453880.pdf