Hepen
asked on
Return Base Domain from URL (String Stuff)
Example:
Dim strURL as string
strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"
Msgbox ReturnBaseDomain(strURL)
How do I make the function ReturnBaseDomain to return only the https://www.experts-exchange.com part of the url.
Dim strURL as string
strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"
Msgbox ReturnBaseDomain(strURL)
How do I make the function ReturnBaseDomain to return only the https://www.experts-exchange.com part of the url.
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Hepen,
> Is there a way to make it so it doesn't return http:// or /
My post does that...
Regards,
Patrick
> Is there a way to make it so it doesn't return http:// or /
My post does that...
Regards,
Patrick
ASKER
Only problem is i have to use regex, i was hoping for a string manipulated way. Although I will use your code for another project for my server application.
If you want to use API than there is another way
ASKER
what is the api way
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Did you mean the "UrlGetPartA"/"UrlGetPart" API?
[ http://vbnet.mvps.org/index.html?code/internet/urlgetpart.htm ]
BFN,
fp.
[ http://vbnet.mvps.org/index.html?code/internet/urlgetpart.htm ]
BFN,
fp.
Well my first thought was InternetCrackUrl API
Hepen,
> Only problem is i have to use regex, i was hoping for a string manipulated way.
What's wrong with using RegEx? BTW, I found a pattern string that does away with the Replace:
SomeVariable = RegExpFind(strURL, "[a-z0-9-]{2,}\.[a-z0-9-]{ 2,}(\.[a-z 0-9-]{2,}) {0,}", 1, False)
That will work regardless of whether there are any forward slashes in the URL.
Regards,
Patrick
> Only problem is i have to use regex, i was hoping for a string manipulated way.
What's wrong with using RegEx? BTW, I found a pattern string that does away with the Replace:
SomeVariable = RegExpFind(strURL, "[a-z0-9-]{2,}\.[a-z0-9-]{
That will work regardless of whether there are any forward slashes in the URL.
Regards,
Patrick
For example...
Option Explicit
Private Declare Function UrlGetPart _
Lib "shlwapi.dll" _
Alias "UrlGetPartA" _
(ByVal pszIn As String, _
ByVal pszOut As String, _
ByRef pcchOut As Long, _
ByVal dwPart As Long, _
ByVal dwFlags As Long) As Long
Private Function strGet_URL_Part(ByVal strURL As String, _
ByVal lngPart As Long) As String
Dim lngSize As Long
Dim strPart As String
Dim strReturn As String
On Error GoTo Err_strGet_URL_Part
Const lngMAX_PATH As Long = 260&
Const lngURL_PARTFLAG_KEEPSCHEME As Long = &H1
strReturn = ""
If Len(strURL) > 0 Then
strPart = Space$(lngMAX_PATH)
lngSize = Len(strPart)
If UrlGetPart(strURL, _
strPart, _
lngSize, _
lngPart, _
lngURL_PARTFLAG_KEEPSCHEME ) = 0& Then
strReturn = Left$(strPart, lngSize)
End If
End If
Exit_strGet_URL_Part:
On Error Resume Next
strGet_URL_Part = strReturn
Err_strGet_URL_Part:
On Error Resume Next
strReturn = ""
Resume Exit_strGet_URL_Part
End Function
Public Sub Test_GetURLPart()
Dim strURL As String
On Error Resume Next
' Const lngURL_PART_SCHEME As Long = 1&
Const lngURL_PART_HOSTNAME As Long = 2&
' Const lngURL_PART_USERNAME As Long = 3&
' Const lngURL_PART_PASSWORD As Long = 4&
' Const lngURL_PART_PORT As Long = 5&
' Const lngURL_PART_QUERY As Long = 6&
strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"
MsgBox Replace(strGet_URL_Part(st rURL, lngURL_PART_HOSTNAME), Left$(strURL & "/", InStr(strURL, "/") - 1), ""), _
vbInformation Or vbOKOnly
End Sub
BFN,
fp.
Option Explicit
Private Declare Function UrlGetPart _
Lib "shlwapi.dll" _
Alias "UrlGetPartA" _
(ByVal pszIn As String, _
ByVal pszOut As String, _
ByRef pcchOut As Long, _
ByVal dwPart As Long, _
ByVal dwFlags As Long) As Long
Private Function strGet_URL_Part(ByVal strURL As String, _
ByVal lngPart As Long) As String
Dim lngSize As Long
Dim strPart As String
Dim strReturn As String
On Error GoTo Err_strGet_URL_Part
Const lngMAX_PATH As Long = 260&
Const lngURL_PARTFLAG_KEEPSCHEME
strReturn = ""
If Len(strURL) > 0 Then
strPart = Space$(lngMAX_PATH)
lngSize = Len(strPart)
If UrlGetPart(strURL, _
strPart, _
lngSize, _
lngPart, _
lngURL_PARTFLAG_KEEPSCHEME
strReturn = Left$(strPart, lngSize)
End If
End If
Exit_strGet_URL_Part:
On Error Resume Next
strGet_URL_Part = strReturn
Err_strGet_URL_Part:
On Error Resume Next
strReturn = ""
Resume Exit_strGet_URL_Part
End Function
Public Sub Test_GetURLPart()
Dim strURL As String
On Error Resume Next
' Const lngURL_PART_SCHEME As Long = 1&
Const lngURL_PART_HOSTNAME As Long = 2&
' Const lngURL_PART_USERNAME As Long = 3&
' Const lngURL_PART_PASSWORD As Long = 4&
' Const lngURL_PART_PORT As Long = 5&
' Const lngURL_PART_QUERY As Long = 6&
strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"
MsgBox Replace(strGet_URL_Part(st
vbInformation Or vbOKOnly
End Sub
BFN,
fp.
PS. egl1044's API suggestion:
[ http://msdn.microsoft.com/library/en-us/wininet/wininet/internetcrackurl.asp ]
And sample class file showing code for usage of...
InternetCanonicalizeUrl
InternetCrackUrl
InternetCreateUrl
[ http://vb.mvps.org/samples/project.asp?id=NetUrl ]
' ************************** ********** ********** ********** ********** *******
' Copyright ©1998-2000 Karl E. Peterson
' All Rights Reserved, http://www.mvps.org/vb
' ************************** ********** ********** ********** ********** *******
' You are free to use this code within your own applications, but you
' are expressly forbidden from selling or otherwise distributing this
' source code, non-compiled, without prior written consent.
' ************************** ********** ********** ********** ********** *******
Option Explicit
'========================= ========== ========== ========
'=========== INTERNet URL DECLARATIONS ===============
'========================= ========== ========== ========
Private Declare Function InternetCrackUrl Lib "wininet.dll" Alias "InternetCrackUrlA" (ByVal lpszUrl As String, ByVal dwUrlLength As Long, ByVal dwFlags As Long, lpUrlComponents As URL_COMPONENTS) As Long
Private Declare Function InternetCanonicalizeUrl Lib "wininet.dll" Alias "InternetCanonicalizeUrlA" (ByVal lpszUrl As String, ByVal lpszBuffer As String, lpdwBufferLength As Long, ByVal dwFlags As Long) As Long
Private Declare Function InternetCreateUrl Lib "wininet.dll" Alias "InternetCreateUrlA" (lpUrlComponents As URL_COMPONENTS, ByVal dwFlags As Long, ByVal lpszUrl As String, lpdwUrlLength As Long) As Long
Private Type URL_COMPONENTS 'typedef struct {
StructSize As Long ' DWORD dwStructSize;
Scheme As String ' LPSTR lpszScheme;
SchemeLength As Long ' DWORD dwSchemeLength;
nScheme As Long ' INTERNET_SCHEME nScheme;
HostName As String ' LPSTR lpszHostName;
HostNameLength As Long ' DWORD dwHostNameLength;
nPort As Long ' INTERNET_PORT nPort;
UserName As String ' LPSTR lpszUserName;
UserNameLength As Long ' DWORD dwUserNameLength;
Password As String ' LPSTR lpszPassword;
PasswordLength As Long ' DWORD dwPasswordLength;
URLPath As String ' LPSTR lpszUrlPath;
UrlPathLength As Long ' DWORD dwUrlPathLength;
ExtraInfo As String ' LPSTR lpszExtraInfo;
ExtraInfoLength As Long ' DWORD dwExtraInfoLength;
End Type '} URL_COMPONENTS;
'
' Enumerated internet schemes
'
Public Enum InetSchemes
InternetSchemePartial = -2
InternetSchemeUnknown = -1
InternetSchemeDefault = 0
InternetSchemeFtp
InternetSchemeGopher
InternetSchemeHttp
InternetSchemeHttps
InternetSchemeFile
InternetSchemeNews
InternetSchemeMailto
InternetSchemeSocks
InternetSchemeFirst = InternetSchemeFtp
InternetSchemeLast = InternetSchemeSocks
End Enum
' Flags for InternetCrackUrl() and InternetCreateUrl()
Private Const ICU_ESCAPE = &H80000000 '// (un)escape URL characters
Private Const ICU_USERNAME = &H40000000 '// use internal username & password
' Flags for InternetCanonicalizeUrl() and InternetCombineUrl()
Private Const ICU_NO_ENCODE = &H20000000 '// Don't convert unsafe characters to escape sequence
Private Const ICU_DECODE = &H10000000 '// Convert %XX escape sequences to characters
Private Const ICU_NO_META = &H8000000 '// Don't convert .. etc. meta path sequences
Private Const ICU_ENCODE_SPACES_ONLY = &H4000000 '// Encode spaces only
Private Const ICU_BROWSER_MODE = &H2000000 '// Special encode/decode rules for browser
' Possible error codes
Private Const ERROR_INSUFFICIENT_BUFFER As Long = 122
' Set aside storage for private member variables.
Private m_Url As String
Private m_UrlCmp As URL_COMPONENTS
Private m_UrlCmpMT As URL_COMPONENTS
Private m_LastDllError As Long
' ************************** ********** ********
' Initialize
' ************************** ********** ********
Private Sub Class_Initialize()
' set size into "empty" structure
m_UrlCmpMT.StructSize = Len(m_UrlCmpMT)
End Sub
' ************************** ********** ********
' Public Properties
' ************************** ********** ********
Public Property Let ExtraInfo(ByVal NewVal As String)
m_UrlCmp.ExtraInfo = NewVal
m_UrlCmp.ExtraInfoLength = Len(NewVal)
End Property
Public Property Get ExtraInfo() As String
ExtraInfo = m_UrlCmp.ExtraInfo
End Property
Public Property Let HostName(ByVal NewVal As String)
m_UrlCmp.HostName = NewVal
m_UrlCmp.HostNameLength = Len(NewVal)
End Property
Public Property Get HostName() As String
HostName = m_UrlCmp.HostName
End Property
Public Property Let Password(ByVal NewVal As String)
m_UrlCmp.Password = NewVal
m_UrlCmp.PasswordLength = Len(NewVal)
End Property
Public Property Get Password() As String
Password = m_UrlCmp.Password
End Property
Public Property Let Port(ByVal NewVal As Long)
m_UrlCmp.nPort = NewVal
End Property
Public Property Get Port() As Long
Port = m_UrlCmp.nPort
End Property
Public Property Let Scheme(ByVal NewVal As InetSchemes)
m_UrlCmp.nScheme = NewVal
End Property
Public Property Get Scheme() As InetSchemes
Scheme = m_UrlCmp.nScheme
End Property
Public Property Let URLPath(ByVal NewVal As String)
m_UrlCmp.URLPath = NewVal
m_UrlCmp.UrlPathLength = Len(NewVal)
End Property
Public Property Get URLPath() As String
URLPath = m_UrlCmp.URLPath
End Property
Public Property Let UserName(ByVal NewVal As String)
m_UrlCmp.UserName = NewVal
m_UrlCmp.UserNameLength = Len(NewVal)
End Property
Public Property Get UserName() As String
UserName = m_UrlCmp.UserName
End Property
Public Property Let Value(ByVal NewVal As String)
m_Url = NewVal
m_UrlCmp = m_UrlCmpMT
m_LastDllError = 0
Call CrackUrl(m_Url, m_UrlCmp)
End Property
Public Property Get Value() As String
Attribute Value.VB_UserMemId = 0
m_LastDllError = 0
Value = CreateUrl(m_UrlCmp)
End Property
' ************************** ********** ********
' Public Properties // Read-Only
' ************************** ********** ********
Public Property Get LastDllError() As Long
LastDllError = m_LastDllError
End Property
Public Property Get SchemeName() As String
SchemeName = m_UrlCmp.Scheme
End Property
' ************************** ********** ********
' Private Methods
' ************************** ********** ********
Private Function CreateUrl(cmp As URL_COMPONENTS) As String
Dim Buffer As String
Dim BufLen As Long
' prepare buffers
Buffer = String$(1024, 0)
BufLen = Len(Buffer)
cmp.StructSize = Len(cmp)
' try to put it all together
If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
CreateUrl = TrimNull(Buffer)
Else
If Err.LastDllError = ERROR_INSUFFICIENT_BUFFER Then
' bump buffer and try again
Buffer = String$(BufLen, 0)
If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
CreateUrl = TrimNull(Buffer)
End If
Else
m_LastDllError = Err.LastDllError
Debug.Print "InternetCreateUrl error: " & m_LastDllError
End If
End If
End Function
Private Function CrackUrl(ByVal URL As String, cmp As URL_COMPONENTS) As Boolean
Dim Buffer As String
Dim BufLen As Long
Const BufSize = 1024
'
' Try to insure a valid URL to begin with
'
Buffer = Space$(BufSize)
BufLen = Len(Buffer)
If InternetCanonicalizeUrl(UR L, Buffer, BufLen, ICU_BROWSER_MODE) Then
URL = Left(Buffer, BufLen)
'
' Reset structure and supply buffers
'
With cmp
.StructSize = Len(cmp)
.Scheme = Space$(BufSize)
.SchemeLength = BufSize
.nScheme = InternetSchemeUnknown
.HostName = Space$(BufSize)
.HostNameLength = BufSize
.nPort = 0
.UserName = Space$(BufSize)
.UserNameLength = BufSize
.Password = Space$(BufSize)
.PasswordLength = BufSize
.URLPath = Space$(BufSize)
.UrlPathLength = BufSize
.ExtraInfo = Space$(BufSize)
.ExtraInfoLength = BufSize
End With
'
' Crack URL apart and get what we can from it.
' Note: API requires Len(URL) param to include trailing null!
'
CrackUrl = CBool(InternetCrackUrl(URL , Len(URL) + 1, ICU_ESCAPE, cmp))
'
' Clean up structure to get rid of crapola
'
With cmp
.Scheme = Left$(.Scheme, .SchemeLength)
.HostName = Left$(.HostName, .HostNameLength)
.UserName = Left$(.UserName, .UserNameLength)
.Password = Left$(.Password, .PasswordLength)
.URLPath = Left$(.URLPath, .UrlPathLength)
.ExtraInfo = Left$(.ExtraInfo, .ExtraInfoLength)
End With
Else
CrackUrl = False
End If
End Function
Private Function TrimNull(ByVal StrIn As String) As String
Dim nul As Long
'
' Truncate input string at first null.
' If no nulls, perform ordinary Trim.
'
nul = InStr(StrIn, vbNullChar)
Select Case nul
Case Is > 1
TrimNull = Left(StrIn, nul - 1)
Case 1
TrimNull = ""
Case 0
TrimNull = Trim(StrIn)
End Select
End Function
BFN,
fp.
[ http://msdn.microsoft.com/library/en-us/wininet/wininet/internetcrackurl.asp ]
And sample class file showing code for usage of...
InternetCanonicalizeUrl
InternetCrackUrl
InternetCreateUrl
[ http://vb.mvps.org/samples/project.asp?id=NetUrl ]
' **************************
' Copyright ©1998-2000 Karl E. Peterson
' All Rights Reserved, http://www.mvps.org/vb
' **************************
' You are free to use this code within your own applications, but you
' are expressly forbidden from selling or otherwise distributing this
' source code, non-compiled, without prior written consent.
' **************************
Option Explicit
'=========================
'=========== INTERNet URL DECLARATIONS ===============
'=========================
Private Declare Function InternetCrackUrl Lib "wininet.dll" Alias "InternetCrackUrlA" (ByVal lpszUrl As String, ByVal dwUrlLength As Long, ByVal dwFlags As Long, lpUrlComponents As URL_COMPONENTS) As Long
Private Declare Function InternetCanonicalizeUrl Lib "wininet.dll" Alias "InternetCanonicalizeUrlA"
Private Declare Function InternetCreateUrl Lib "wininet.dll" Alias "InternetCreateUrlA" (lpUrlComponents As URL_COMPONENTS, ByVal dwFlags As Long, ByVal lpszUrl As String, lpdwUrlLength As Long) As Long
Private Type URL_COMPONENTS 'typedef struct {
StructSize As Long ' DWORD dwStructSize;
Scheme As String ' LPSTR lpszScheme;
SchemeLength As Long ' DWORD dwSchemeLength;
nScheme As Long ' INTERNET_SCHEME nScheme;
HostName As String ' LPSTR lpszHostName;
HostNameLength As Long ' DWORD dwHostNameLength;
nPort As Long ' INTERNET_PORT nPort;
UserName As String ' LPSTR lpszUserName;
UserNameLength As Long ' DWORD dwUserNameLength;
Password As String ' LPSTR lpszPassword;
PasswordLength As Long ' DWORD dwPasswordLength;
URLPath As String ' LPSTR lpszUrlPath;
UrlPathLength As Long ' DWORD dwUrlPathLength;
ExtraInfo As String ' LPSTR lpszExtraInfo;
ExtraInfoLength As Long ' DWORD dwExtraInfoLength;
End Type '} URL_COMPONENTS;
'
' Enumerated internet schemes
'
Public Enum InetSchemes
InternetSchemePartial = -2
InternetSchemeUnknown = -1
InternetSchemeDefault = 0
InternetSchemeFtp
InternetSchemeGopher
InternetSchemeHttp
InternetSchemeHttps
InternetSchemeFile
InternetSchemeNews
InternetSchemeMailto
InternetSchemeSocks
InternetSchemeFirst = InternetSchemeFtp
InternetSchemeLast = InternetSchemeSocks
End Enum
' Flags for InternetCrackUrl() and InternetCreateUrl()
Private Const ICU_ESCAPE = &H80000000 '// (un)escape URL characters
Private Const ICU_USERNAME = &H40000000 '// use internal username & password
' Flags for InternetCanonicalizeUrl() and InternetCombineUrl()
Private Const ICU_NO_ENCODE = &H20000000 '// Don't convert unsafe characters to escape sequence
Private Const ICU_DECODE = &H10000000 '// Convert %XX escape sequences to characters
Private Const ICU_NO_META = &H8000000 '// Don't convert .. etc. meta path sequences
Private Const ICU_ENCODE_SPACES_ONLY = &H4000000 '// Encode spaces only
Private Const ICU_BROWSER_MODE = &H2000000 '// Special encode/decode rules for browser
' Possible error codes
Private Const ERROR_INSUFFICIENT_BUFFER As Long = 122
' Set aside storage for private member variables.
Private m_Url As String
Private m_UrlCmp As URL_COMPONENTS
Private m_UrlCmpMT As URL_COMPONENTS
Private m_LastDllError As Long
' **************************
' Initialize
' **************************
Private Sub Class_Initialize()
' set size into "empty" structure
m_UrlCmpMT.StructSize = Len(m_UrlCmpMT)
End Sub
' **************************
' Public Properties
' **************************
Public Property Let ExtraInfo(ByVal NewVal As String)
m_UrlCmp.ExtraInfo = NewVal
m_UrlCmp.ExtraInfoLength = Len(NewVal)
End Property
Public Property Get ExtraInfo() As String
ExtraInfo = m_UrlCmp.ExtraInfo
End Property
Public Property Let HostName(ByVal NewVal As String)
m_UrlCmp.HostName = NewVal
m_UrlCmp.HostNameLength = Len(NewVal)
End Property
Public Property Get HostName() As String
HostName = m_UrlCmp.HostName
End Property
Public Property Let Password(ByVal NewVal As String)
m_UrlCmp.Password = NewVal
m_UrlCmp.PasswordLength = Len(NewVal)
End Property
Public Property Get Password() As String
Password = m_UrlCmp.Password
End Property
Public Property Let Port(ByVal NewVal As Long)
m_UrlCmp.nPort = NewVal
End Property
Public Property Get Port() As Long
Port = m_UrlCmp.nPort
End Property
Public Property Let Scheme(ByVal NewVal As InetSchemes)
m_UrlCmp.nScheme = NewVal
End Property
Public Property Get Scheme() As InetSchemes
Scheme = m_UrlCmp.nScheme
End Property
Public Property Let URLPath(ByVal NewVal As String)
m_UrlCmp.URLPath = NewVal
m_UrlCmp.UrlPathLength = Len(NewVal)
End Property
Public Property Get URLPath() As String
URLPath = m_UrlCmp.URLPath
End Property
Public Property Let UserName(ByVal NewVal As String)
m_UrlCmp.UserName = NewVal
m_UrlCmp.UserNameLength = Len(NewVal)
End Property
Public Property Get UserName() As String
UserName = m_UrlCmp.UserName
End Property
Public Property Let Value(ByVal NewVal As String)
m_Url = NewVal
m_UrlCmp = m_UrlCmpMT
m_LastDllError = 0
Call CrackUrl(m_Url, m_UrlCmp)
End Property
Public Property Get Value() As String
Attribute Value.VB_UserMemId = 0
m_LastDllError = 0
Value = CreateUrl(m_UrlCmp)
End Property
' **************************
' Public Properties // Read-Only
' **************************
Public Property Get LastDllError() As Long
LastDllError = m_LastDllError
End Property
Public Property Get SchemeName() As String
SchemeName = m_UrlCmp.Scheme
End Property
' **************************
' Private Methods
' **************************
Private Function CreateUrl(cmp As URL_COMPONENTS) As String
Dim Buffer As String
Dim BufLen As Long
' prepare buffers
Buffer = String$(1024, 0)
BufLen = Len(Buffer)
cmp.StructSize = Len(cmp)
' try to put it all together
If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
CreateUrl = TrimNull(Buffer)
Else
If Err.LastDllError = ERROR_INSUFFICIENT_BUFFER Then
' bump buffer and try again
Buffer = String$(BufLen, 0)
If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
CreateUrl = TrimNull(Buffer)
End If
Else
m_LastDllError = Err.LastDllError
Debug.Print "InternetCreateUrl error: " & m_LastDllError
End If
End If
End Function
Private Function CrackUrl(ByVal URL As String, cmp As URL_COMPONENTS) As Boolean
Dim Buffer As String
Dim BufLen As Long
Const BufSize = 1024
'
' Try to insure a valid URL to begin with
'
Buffer = Space$(BufSize)
BufLen = Len(Buffer)
If InternetCanonicalizeUrl(UR
URL = Left(Buffer, BufLen)
'
' Reset structure and supply buffers
'
With cmp
.StructSize = Len(cmp)
.Scheme = Space$(BufSize)
.SchemeLength = BufSize
.nScheme = InternetSchemeUnknown
.HostName = Space$(BufSize)
.HostNameLength = BufSize
.nPort = 0
.UserName = Space$(BufSize)
.UserNameLength = BufSize
.Password = Space$(BufSize)
.PasswordLength = BufSize
.URLPath = Space$(BufSize)
.UrlPathLength = BufSize
.ExtraInfo = Space$(BufSize)
.ExtraInfoLength = BufSize
End With
'
' Crack URL apart and get what we can from it.
' Note: API requires Len(URL) param to include trailing null!
'
CrackUrl = CBool(InternetCrackUrl(URL
'
' Clean up structure to get rid of crapola
'
With cmp
.Scheme = Left$(.Scheme, .SchemeLength)
.HostName = Left$(.HostName, .HostNameLength)
.UserName = Left$(.UserName, .UserNameLength)
.Password = Left$(.Password, .PasswordLength)
.URLPath = Left$(.URLPath, .UrlPathLength)
.ExtraInfo = Left$(.ExtraInfo, .ExtraInfoLength)
End With
Else
CrackUrl = False
End If
End Function
Private Function TrimNull(ByVal StrIn As String) As String
Dim nul As Long
'
' Truncate input string at first null.
' If no nulls, perform ordinary Trim.
'
nul = InStr(StrIn, vbNullChar)
Select Case nul
Case Is > 1
TrimNull = Left(StrIn, nul - 1)
Case 1
TrimNull = ""
Case 0
TrimNull = Trim(StrIn)
End Select
End Function
BFN,
fp.
Nigel,
Would you be so kind as to see if I am missing anything in my RegExp pattern string? It is trying to match:
[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]
That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules
for the top-level domains...).
Regards,
Patrick
Would you be so kind as to see if I am missing anything in my RegExp pattern string? It is trying to match:
[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]
That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules
for the top-level domains...).
Regards,
Patrick
Heres the example I came up with
Option Explicit
Private Type URL_COMPONENTS
StructSize As Long
Scheme As String
SchemeLength As Long
nScheme As Long
HostName As String
HostNameLength As Long
nPort As Long
UserName As String
UserNameLength As Long
Password As String
PasswordLength As Long
URLPath As String
UrlPathLength As Long
ExtraInfo As String
ExtraInfoLength As Long
End Type
Private Declare Function InternetCrackUrlA Lib "wininet.dll" ( _
ByVal lpszUrl As String, _
ByVal dwUrlLength As Long, _
ByVal dwFlags As Long, _
lpUrlComponents As URL_COMPONENTS) As Long
Dim uCmp As URL_COMPONENTS
Public Function CrackUrl(ByVal url As String) As String
With uCmp
.StructSize = Len(uCmp)
.HostName = Space$(1024)
.HostNameLength = 1024
InternetCrackUrlA url, Len(url), &H80000000, uCmp
CrackUrl = Left$(.HostName, InStr(1, .HostName, Chr$(0)) - 1)
End With
End Function
Private Sub Command1_Click()
MsgBox CrackUrl("http://www.google.com/Programming/")
End Sub
Option Explicit
Private Type URL_COMPONENTS
StructSize As Long
Scheme As String
SchemeLength As Long
nScheme As Long
HostName As String
HostNameLength As Long
nPort As Long
UserName As String
UserNameLength As Long
Password As String
PasswordLength As Long
URLPath As String
UrlPathLength As Long
ExtraInfo As String
ExtraInfoLength As Long
End Type
Private Declare Function InternetCrackUrlA Lib "wininet.dll" ( _
ByVal lpszUrl As String, _
ByVal dwUrlLength As Long, _
ByVal dwFlags As Long, _
lpUrlComponents As URL_COMPONENTS) As Long
Dim uCmp As URL_COMPONENTS
Public Function CrackUrl(ByVal url As String) As String
With uCmp
.StructSize = Len(uCmp)
.HostName = Space$(1024)
.HostNameLength = 1024
InternetCrackUrlA url, Len(url), &H80000000, uCmp
CrackUrl = Left$(.HostName, InStr(1, .HostName, Chr$(0)) - 1)
End With
End Function
Private Sub Command1_Click()
MsgBox CrackUrl("http://www.google.com/Programming/")
End Sub
Who's Nigel
Hepen,
Will your text *always* start with "http://"? My RegExp-enabled function will cope with that eventuality just fine,
but I am not sure the API-based methods will...
(Meaning no disrespect at all toward Nigel or egl1044, of course!)
Regards,
Patrick
Will your text *always* start with "http://"? My RegExp-enabled function will cope with that eventuality just fine,
but I am not sure the API-based methods will...
(Meaning no disrespect at all toward Nigel or egl1044, of course!)
Regards,
Patrick
egl1044,
> Who's Nigel
fanpages = Nigel
:)
Regards,
Patrick
> Who's Nigel
fanpages = Nigel
:)
Regards,
Patrick
haha ok, well author has 3 ways to do this now so I think he got his answere.
egl1044,
> haha ok, well author has 3 ways to do this now so I think he got his answere.
LOL!
Regards,
Patrick
> haha ok, well author has 3 ways to do this now so I think he got his answere.
LOL!
Regards,
Patrick
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Mark,
> why always go for the api or the difficult way?
Your method will work just fine, but *only* if the string we want is *always* bounded by "//" on the left and "/" on
the right. The Asker gave us one example, not a rule that it would always follow that example.
Regards,
Patrick
> why always go for the api or the difficult way?
Your method will work just fine, but *only* if the string we want is *always* bounded by "//" on the left and "/" on
the right. The Asker gave us one example, not a rule that it would always follow that example.
Regards,
Patrick
Why not just Split()? I don't see any obvious faults with that concept, unless user&pass/port are tagged to the syntax.
Option Explicit
Private Sub Form_Load()
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/Programming/askQuestion.jsp" ))
Call MsgBox(ReturnBaseDomain("ftp://www.experts-exchange.com/Programming/askQuestion.jsp" ))
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/askQuestion.jsp" ))
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com" ))
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/" ))
End Sub
Private Function ReturnBaseDomain(ByRef url As String) As String
Dim s() As String
s = Split(url, "/")
If (Right$(s(0), 1) = ":") Then
ReturnBaseDomain = s(2)
Else
ReturnBaseDomain = s(0)
End If
End Function
Option Explicit
Private Sub Form_Load()
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/Programming/askQuestion.jsp"
Call MsgBox(ReturnBaseDomain("ftp://www.experts-exchange.com/Programming/askQuestion.jsp"
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/askQuestion.jsp"
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com"
Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/"
End Sub
Private Function ReturnBaseDomain(ByRef url As String) As String
Dim s() As String
s = Split(url, "/")
If (Right$(s(0), 1) = ":") Then
ReturnBaseDomain = s(2)
Else
ReturnBaseDomain = s(0)
End If
End Function
The "Split" approach should work just fine as long as you could detect for the presence of "/" or "\" (depending on how the URL was presented/specified in the originating environment; e.g. DOS/Windows or UNIX based).
PS. With regards stripping the optional prefix of "Scheme" ("http://" "ftp://" "https://" "file://" "telnet://" etc)
Note the use of the Replace(...) function in my code demonstrating the strGet_URL_Part(...) method:
MsgBox Replace(strGet_URL_Part(st rURL, lngURL_PART_HOSTNAME), Left$(strURL & "/", InStr(strURL, "/") - 1), ""), _
vbInformation Or vbOKOnly
PPS. Patrick...
"Would you be so kind as to see if I am missing anything in my RegExp pattern string? It is trying to match:
[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]
That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules for the top-level domains...)."
I have seen a huge 'regexp' string for e-mail validation in my surfin' travels, & I'm sure there was a similar one for URL parsing... I'll see if I can find it again.
BFN,
fp.
PS. With regards stripping the optional prefix of "Scheme" ("http://" "ftp://" "https://" "file://" "telnet://" etc)
Note the use of the Replace(...) function in my code demonstrating the strGet_URL_Part(...) method:
MsgBox Replace(strGet_URL_Part(st
vbInformation Or vbOKOnly
PPS. Patrick...
"Would you be so kind as to see if I am missing anything in my RegExp pattern string? It is trying to match:
[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]
That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules for the top-level domains...)."
I have seen a huge 'regexp' string for e-mail validation in my surfin' travels, & I'm sure there was a similar one for URL parsing... I'll see if I can find it again.
BFN,
fp.
PPPS. [!] Mark_FreeSoftware:
Private Function TrimURL(ByVal str As String) As String
Dim n As Long
n = InStr(str, "//")
If n > 0 Then str = Right$(str, Len(str) - n - 1)
n = InStr(str, "/")
If n > 0 Then str = Left$(str, n - 1)
TrimURL = str ' *** ADDED :)
End Function
BFN,
fp.
Private Function TrimURL(ByVal str As String) As String
Dim n As Long
n = InStr(str, "//")
If n > 0 Then str = Right$(str, Len(str) - n - 1)
n = InStr(str, "/")
If n > 0 Then str = Left$(str, n - 1)
TrimURL = str ' *** ADDED :)
End Function
BFN,
fp.
Hi again Patrick,
Here's a few 'regexp' examples for URL parsing:
[ http://textsnippets.com/posts/show/523 ]
"^((http|https):\/)?\/?([^ :\/\s]+)(( \/\w+)*\/) ([\w\-\.]+ \.[^#?\s]+ )(.*?)(#[\ w\-]+)?$"
(Obviously need to add more "protocol scheme" prefixes to that, e.g. "mailto", "ftp", "ftps", "file", "telnet", "ldap", "news", "gopher", "data", "finger", "local", "local-cgi", "about", and any more I've missed!)
(More searching...)
Comprehensive list here:
[ http://www.iana.org/assignments/uri-schemes.html ]
Further reading here:
[ http://esw.w3.org/topic/UriSchemes ]
[ http://www.faqs.org/rfcs/rfc1738.html ]
PS. Anybody good with converting Python scripts to VB(A)?
[ http://docs.python.org/lib/module-urlparse.html ]
and
[ http://mail.python.org/pipermail/doc-sig/2001-March/001456.html ]
r'\b((?:http|ftp|https|mai lto)://[\w @&#-_.!~*( );]+\b/?)'
BFN,
fp.
Here's a few 'regexp' examples for URL parsing:
[ http://textsnippets.com/posts/show/523 ]
"^((http|https):\/)?\/?([^
(Obviously need to add more "protocol scheme" prefixes to that, e.g. "mailto", "ftp", "ftps", "file", "telnet", "ldap", "news", "gopher", "data", "finger", "local", "local-cgi", "about", and any more I've missed!)
(More searching...)
Comprehensive list here:
[ http://www.iana.org/assignments/uri-schemes.html ]
Further reading here:
[ http://esw.w3.org/topic/UriSchemes ]
[ http://www.faqs.org/rfcs/rfc1738.html ]
PS. Anybody good with converting Python scripts to VB(A)?
[ http://docs.python.org/lib/module-urlparse.html ]
and
[ http://mail.python.org/pipermail/doc-sig/2001-March/001456.html ]
r'\b((?:http|ftp|https|mai
BFN,
fp.
Thanks for asking & subsequently closing the question, Hepen.
Hopefully all the options should prove to be a valuable resource for future question askers/Experts alike :)
BFN,
fp.
Hopefully all the options should prove to be a valuable resource for future question askers/Experts alike :)
BFN,
fp.
lol fanpages, that's like a quadropost ;)
matthewspatrick,
> Your method will work just fine, but *only* if the string we want is *always* bounded by "//" on the left and "/" on
> the right. The Asker gave us one example, not a rule that it would always follow that example.
no, it works fine without
try it !
(i did, with these urls:
http://www.test.com
http://www.test.com/test
www.test.com
www.test.com/test)
and they all worked
Hi Mark,
Thanks... erm, I think. "quadropost"? A phrase I'm not familiar with :(
Is that German or another great divide of the language used by both UK & US cousins?
BFN,
fp.
Thanks... erm, I think. "quadropost"? A phrase I'm not familiar with :(
Is that German or another great divide of the language used by both UK & US cousins?
BFN,
fp.
hehe it's not german, it's....
err i don't know
quadro is italian i think it means 4
and no i'm not an italian ;)
PS. The e-mail validation "regexp" I was referring to:
In a word, OUCH!
[ http://regular-expressions.com/email.html ]
(?:(?:\r\n)?[ \t])*(?:(?:(?:[^()<>@,;:\\ ".\[\] \x00-\x1F]+(?:(?:
(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "(?:[^\"\r \\]|\\.
|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t]
)*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=
[\["()<>@,;:\\".\[\]]))|"( ?:[^\"\r\\ ]|\\.|(?:( ?:\r\n)?[ \t]))*"(
?:(?:\r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \
x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \
[([^\[\]\r\\]|\\.)*\](?:(? :\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])
*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[
\["()<>@,;:\\".\[\]]))|\[( [^\[\]\r\\ ]|\\.)*\]( ?:(?:\r\n) ?[ \t])*
))*|(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(
?=[\["()<>@,;:\\".\[\]]))| "(?:[^\"\r \\]|\\.|(? :(?:\r\n)? [ \t]))*
"(?:(?:\r\n)?[ \t])*)*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".
\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\
]]))|\[([^\[\]\r\\]|\\.)*\ ](?:(?:\r\ n)?[ \t])*)(?:\.(?:(?:\r\n)?
[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\
Z|(?=[\["()<>@,;:\\".\[\]] ))|\[([^\[ \]\r\\]|\\ .)*\](?:(? :\r\n)?[
\t])*))*(?:,@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F
]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]
\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()
<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@
,;:\\".\[\]]))|\[([^\[\]\r \\]|\\.)*\ ](?:(?:\r\ n)?[ \t])*))*)*:(?
:(?:\r\n)?[ \t])*)?(?:[^()<>@,;:\\".\[ \] \x00-\x1F]+(?:(?:(?:\r\
n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "(?:[^\"\r \\]|\\.|(? :(?
:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[
^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()
<>@,;:\\".\[\]]))|"(?:[^\" \r\\]|\\.| (?:(?:\r\n )?[ \t]))*"(?:(?:\
r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[
\]\r\\]|\\.)*\](?:(?:\r\n) ?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^
()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<
>@,;:\\".\[\]]))|\[([^\[\] \r\\]|\\.) *\](?:(?:\ r\n)?[ \t])*))*\>(
?:(?:\r\n)?[ \t])*)|(?:[^()<>@,;:\\".\[ \] \x00-\x1F]+(?:(?:(?:\r
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "(?:[^\"\r \\]|\\.|(? :(
?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)*:(?:(?:\r\n)?[ \t])*(?:(?:
(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\
["()<>@,;:\\".\[\]]))|"(?: [^\"\r\\]| \\.|(?:(?: \r\n)?[ \t]))*"(?:
(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \
x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "
(?:[^\"\r\\]|\\.|(?:(?:\r\ n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:
(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\ r\\]|\\.)* \](?:(
?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x
00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[
([^\[\]\r\\]|\\.)*\](?:(?: \r\n)?[ \t])*))*|(?:[^()<>@,;:\\". \[\]
\x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))
|"(?:[^\"\r\\]|\\.|(?:(?:\ r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)*\<(
?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\". \[\] \x00-\x1F]+(?:(?:(?:
\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\ r\\]|\\.)*
\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\
[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]
]))|\[([^\[\]\r\\]|\\.)*\] (?:(?:\r\n )?[ \t])*))*(?:,@(?:(?:\r\n)
?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|
\Z|(?=[\["()<>@,;:\\".\[\] ]))|\[([^\ [\]\r\\]|\ \.)*\](?:( ?:\r\n)?
[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]
+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\
r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*)*:(?:(?:\r\n)?[ \t])*)?(?:[^(
)<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>
@,;:\\".\[\]]))|"(?:[^\"\r \\]|\\.|(? :(?:\r\n)? [ \t]))*"(?:(?:\r\
n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "(?:[^\
"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:(?:\r\
n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])
+|\Z|(?=[\["()<>@,;:\\".\[ \]]))|\[([ ^\[\]\r\\] |\\.)*\](? :(?:\r\n
)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1
F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\
]\r\\]|\\.)*\](?:(?:\r\n)? [ \t])*))*\>(?:(?:\r\n)?[ \t])*)(?:,\s
*(?:(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(
?=[\["()<>@,;:\\".\[\]]))| "(?:[^\"\r \\]|\\.|(? :(?:\r\n)? [ \t]))*
"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[
\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]
))|"(?:[^\"\r\\]|\\.|(?:(? :\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*
@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\ r\\]|\\.)* \]
(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\
] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]])
)|\[([^\[\]\r\\]|\\.)*\](? :(?:\r\n)? [ \t])*))*|(?:[^()<>@,;:\\".
\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\
]]))|"(?:[^\"\r\\]|\\.|(?: (?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)
*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\". \[\] \x00-\x1F]+(?:(?
:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\ r\\]|\
\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\
\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["()<>@,;:\\ ".
\[\]]))|\[([^\[\]\r\\]|\\. )*\](?:(?: \r\n)?[ \t])*))*(?:,@(?:(?:\
r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t
])+|\Z|(?=[\["()<>@,;:\\". \[\]]))|\[ ([^\[\]\r\ \]|\\.)*\] (?:(?:\r
\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\
x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\
[\]\r\\]|\\.)*\](?:(?:\r\n )?[ \t])*))*)*:(?:(?:\r\n)?[ \t])*)?(?
:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [ \t])+|\Z|(?=[\["
()<>@,;:\\".\[\]]))|"(?:[^ \"\r\\]|\\ .|(?:(?:\r \n)?[ \t]))*"(?:(?
:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x0
0-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| "(?
:[^\"\r\\]|\\.|(?:(?:\r\n) ?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:(?
:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)? [
\t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([^\[\]\ r\\]|\\.)* \](?:(?:
\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00
-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\ ".\[\]]))| \[([
^\[\]\r\\]|\\.)*\](?:(?:\r \n)?[ \t])*))*\>(?:(?:\r\n)?[ \t])*))*
)?;\s*)
See also:
[ http://sleeksoft.co.uk/public/techblog/articles/20050121_3.html ]
(A blog/site owned by a previous colleague of mine)
BFN,
fp.
In a word, OUCH!
[ http://regular-expressions.com/email.html ]
(?:(?:\r\n)?[ \t])*(?:(?:(?:[^()<>@,;:\\
(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t]
)*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
[\["()<>@,;:\\".\[\]]))|"(
?:(?:\r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
x00-\x1F]+(?:(?:(?:\r\n)?[
[([^\[\]\r\\]|\\.)*\](?:(?
*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
\["()<>@,;:\\".\[\]]))|\[(
))*|(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
?=[\["()<>@,;:\\".\[\]]))|
"(?:(?:\r\n)?[ \t])*)*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".
\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
]]))|\[([^\[\]\r\\]|\\.)*\
[ \t])*(?:[^()<>@,;:\\".\[\]
Z|(?=[\["()<>@,;:\\".\[\]]
\t])*))*(?:,@(?:(?:\r\n)?[
]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
\r\\]|\\.)*\](?:(?:\r\n)?[
<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
,;:\\".\[\]]))|\[([^\[\]\r
:(?:\r\n)?[ \t])*)?(?:[^()<>@,;:\\".\[
n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[
^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
<>@,;:\\".\[\]]))|"(?:[^\"
r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
\]\r\\]|\\.)*\](?:(?:\r\n)
()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
>@,;:\\".\[\]]))|\[([^\[\]
?:(?:\r\n)?[ \t])*)|(?:[^()<>@,;:\\".\[
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)*:(?:(?:\r\n)?[ \t])*(?:(?:
(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
["()<>@,;:\\".\[\]]))|"(?:
(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
x00-\x1F]+(?:(?:(?:\r\n)?[
(?:[^\"\r\\]|\\.|(?:(?:\r\
(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
[ \t])+|\Z|(?=[\["()<>@,;:\\
?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
([^\[\]\r\\]|\\.)*\](?:(?:
\x00-\x1F]+(?:(?:(?:\r\n)?
|"(?:[^\"\r\\]|\\.|(?:(?:\
?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".
\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\
[\] \x00-\x1F]+(?:(?:(?:\r\n)?
]))|\[([^\[\]\r\\]|\\.)*\]
?[ \t])*(?:[^()<>@,;:\\".\[\]
\Z|(?=[\["()<>@,;:\\".\[\]
[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*)*:(?:(?:\r\n)?[ \t])*)?(?:[^(
)<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
@,;:\\".\[\]]))|"(?:[^\"\r
n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:(?:\r\
n)?[ \t])*(?:[^()<>@,;:\\".\[\]
+|\Z|(?=[\["()<>@,;:\\".\[
)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
]\r\\]|\\.)*\](?:(?:\r\n)?
*(?:(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
?=[\["()<>@,;:\\".\[\]]))|
"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[
\] \x00-\x1F]+(?:(?:(?:\r\n)?
))|"(?:[^\"\r\\]|\\.|(?:(?
@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\
] \x00-\x1F]+(?:(?:(?:\r\n)?
)|\[([^\[\]\r\\]|\\.)*\](?
\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
]]))|"(?:[^\"\r\\]|\\.|(?:
*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".
:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\
\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
\[\]]))|\[([^\[\]\r\\]|\\.
r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
])+|\Z|(?=[\["()<>@,;:\\".
\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
[\]\r\\]|\\.)*\](?:(?:\r\n
:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
()<>@,;:\\".\[\]]))|"(?:[^
:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
0-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
:[^\"\r\\]|\\.|(?:(?:\r\n)
:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
\t])+|\Z|(?=[\["()<>@,;:\\
\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\]
-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\
^\[\]\r\\]|\\.)*\](?:(?:\r
)?;\s*)
See also:
[ http://sleeksoft.co.uk/public/techblog/articles/20050121_3.html ]
(A blog/site owned by a previous colleague of mine)
BFN,
fp.
Well, yes, "quad" is Latin for 'four'.
I just didn't understand what you meant by "quadropost".
I just didn't understand what you meant by "quadropost".
whoever wrote that,
he has too much free time :P
[And whomever just read it...? ;)]
More to digest, Patrick:
[ http://www.cambiaresearch.com/cambia3/snippets/csharp/regex/uri_regex.aspx ]
regexPattern = @"^(?<s1>(?<s0>[^:/\?#]+): )?(?<a1>"
+ @"//(?<a0>[^/\?#]*))?(?<p0 >[^\?#]*)"
+ @"(?<q1>\?(?<q0>[^#]*))?"
+ @"(?<f1>#(?<f0>.*))?";
[ http://www.cambiaresearch.com/cambia3/snippets/csharp/regex/uri_regex.aspx ]
regexPattern = @"^(?<s1>(?<s0>[^:/\?#]+):
+ @"//(?<a0>[^/\?#]*))?(?<p0
+ @"(?<q1>\?(?<q0>[^#]*))?"
+ @"(?<f1>#(?<f0>.*))?";
And this may have some relevance - note the comment about URLs without domains; those that are relative!
[ http://www.cflib.org/udf.cfm?ID=494 ]
===
<cfscript>
/**
* Returns the host from a specified URL.
* RE fix for MX, thanks to Tom Lane
*
* @param this_url URL to parse. (Required)
* @return Returns a string.
* @author Shawn Seley (shawnse@aol.com)
* @version 2, August 23, 2002
*/
function GetHostFromURL(this_url) {
var first_char = "";
var re_found_struct = "";
var num_expressions = 0;
var num_dots = 0;
var this_host = "";
this_url = trim(this_url);
first_char = Left(this_url, 1);
if (Find(first_char, "./")) {
return ""; // relative URL = no host (ex: "../dir1/filename.html" or "/dir1/filename.html")
} else if(Find("://", this_url)){
// absolute URL (ex: "pass@ftp.host.com">ftp://user:pass@ftp.host.com")
re_found_struct = REFind("[^@]*@([^/:\?##]+) |([^/:\?## ]+)", this_url, Find("://", this_url)+3, "True");
} else {
// abbreviated URL (ex: "user:pass@ftp.host.com")
re_found_struct = REFind("[^@]*@([^/:\?##]+) |([^/:\?## ]+)", this_url, 1, "True");
}
if (re_found_struct.pos[1] GT 0) {
num_expressions = ArrayLen(re_found_struct.p os);
if(re_found_struct.pos[num _expressio ns] is 0) num_expressions = num_expressions - 1;
this_host = Mid(this_url, re_found_struct.pos[num_ex pressions] , re_found_struct.len[num_ex pressions] );
num_dots = (Len(this_host) - Len(Replace(this_host, ".", "", "ALL")));;
if ((not FindOneOf("/@:", this_url)) and (num_dots LT 2)){
// since this URL doesn't contain any "/" or "@" or ":" characters and since the "host" has fewer than two dots (".")
// then it is probably actually a file name
return "";
}
return this_host;
} else {
return "";
}
}
</cfscript>
===
Also note the originating page where I found a link to the above code:
[ http://www.houseoffusion.com/cf_lists/message.cfm/forumid:4/messageid:126884 ]
"I found the GetHostFromURL() function on CFLib. The function seems to
work pretty well, it even strips port numbers etc. The problem with that
function is that it will strip anything before the first "." if there is
anything. So, http://www.something.com is returned as something.com .
However I need the hostname as well. I would modify the existing
function, however I am not too keen on RegEx atm.
...chris.alvarado
[ application developer ]
4 Guys Interactive, Inc.
..."
[ http://www.cflib.org/udf.cfm?ID=494 ]
===
<cfscript>
/**
* Returns the host from a specified URL.
* RE fix for MX, thanks to Tom Lane
*
* @param this_url URL to parse. (Required)
* @return Returns a string.
* @author Shawn Seley (shawnse@aol.com)
* @version 2, August 23, 2002
*/
function GetHostFromURL(this_url) {
var first_char = "";
var re_found_struct = "";
var num_expressions = 0;
var num_dots = 0;
var this_host = "";
this_url = trim(this_url);
first_char = Left(this_url, 1);
if (Find(first_char, "./")) {
return ""; // relative URL = no host (ex: "../dir1/filename.html" or "/dir1/filename.html")
} else if(Find("://", this_url)){
// absolute URL (ex: "pass@ftp.host.com">ftp://user:pass@ftp.host.com")
re_found_struct = REFind("[^@]*@([^/:\?##]+)
} else {
// abbreviated URL (ex: "user:pass@ftp.host.com")
re_found_struct = REFind("[^@]*@([^/:\?##]+)
}
if (re_found_struct.pos[1] GT 0) {
num_expressions = ArrayLen(re_found_struct.p
if(re_found_struct.pos[num
this_host = Mid(this_url, re_found_struct.pos[num_ex
num_dots = (Len(this_host) - Len(Replace(this_host, ".", "", "ALL")));;
if ((not FindOneOf("/@:", this_url)) and (num_dots LT 2)){
// since this URL doesn't contain any "/" or "@" or ":" characters and since the "host" has fewer than two dots (".")
// then it is probably actually a file name
return "";
}
return this_host;
} else {
return "";
}
}
</cfscript>
===
Also note the originating page where I found a link to the above code:
[ http://www.houseoffusion.com/cf_lists/message.cfm/forumid:4/messageid:126884 ]
"I found the GetHostFromURL() function on CFLib. The function seems to
work pretty well, it even strips port numbers etc. The problem with that
function is that it will strip anything before the first "." if there is
anything. So, http://www.something.com is returned as something.com .
However I need the hostname as well. I would modify the existing
function, however I am not too keen on RegEx atm.
...chris.alvarado
[ application developer ]
4 Guys Interactive, Inc.
..."
Nigel,
re: excruciatingly long URL and email address parsing pattern strings
I guess I just re-learned the meaning of "be careful of what you ask for".
Double :)
Patrick
re: excruciatingly long URL and email address parsing pattern strings
I guess I just re-learned the meaning of "be careful of what you ask for".
Double :)
Patrick
I never leave any turn unstoned.
ASKER
just https://www.experts-exchange.com