Link to home
Start Free TrialLog in
Avatar of Hepen
Hepen

asked on

Return Base Domain from URL (String Stuff)

Example:

Dim strURL as string
strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"

Msgbox ReturnBaseDomain(strURL)

How do I make the function ReturnBaseDomain to return only the  https://www.experts-exchange.com part of the url.



SOLUTION
Avatar of Patrick Matthews
Patrick Matthews
Flag of United States of America image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
SOLUTION
Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of Hepen
Hepen

ASKER

Is there a way to make it so it doesn't return http:// or /

just https://www.experts-exchange.com
Hepen,
> Is there a way to make it so it doesn't return http:// or /

My post does that...

Regards,

Patrick
Avatar of Hepen

ASKER

Only problem is i have to use regex, i was hoping for a string manipulated way. Although I will use your code for another project for my server application.
If you want to use API than there is another way
Avatar of Hepen

ASKER

what is the api way
SOLUTION
Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Did you mean the "UrlGetPartA"/"UrlGetPart" API?

[ http://vbnet.mvps.org/index.html?code/internet/urlgetpart.htm ]

BFN,

fp.
Well my first thought was InternetCrackUrl API
Hepen,
> Only problem is i have to use regex, i was hoping for a string manipulated way.

What's wrong with using RegEx?  BTW, I found a pattern string that does away with the Replace:

SomeVariable = RegExpFind(strURL, "[a-z0-9-]{2,}\.[a-z0-9-]{2,}(\.[a-z0-9-]{2,}){0,}", 1, False)

That will work regardless of whether there are any forward slashes in the URL.

Regards,

Patrick
For example...

Option Explicit

Private Declare Function UrlGetPart _
                     Lib "shlwapi.dll" _
                   Alias "UrlGetPartA" _
                  (ByVal pszIn As String, _
                   ByVal pszOut As String, _
                   ByRef pcchOut As Long, _
                   ByVal dwPart As Long, _
                   ByVal dwFlags As Long) As Long
Private Function strGet_URL_Part(ByVal strURL As String, _
                                 ByVal lngPart As Long) As String

  Dim lngSize                                           As Long
  Dim strPart                                           As String
  Dim strReturn                                         As String
 
  On Error GoTo Err_strGet_URL_Part
 
  Const lngMAX_PATH                                     As Long = 260&
  Const lngURL_PARTFLAG_KEEPSCHEME                      As Long = &H1

  strReturn = ""
 
  If Len(strURL) > 0 Then
     strPart = Space$(lngMAX_PATH)
     lngSize = Len(strPart)
     
     If UrlGetPart(strURL, _
                   strPart, _
                   lngSize, _
                   lngPart, _
                   lngURL_PARTFLAG_KEEPSCHEME) = 0& Then
        strReturn = Left$(strPart, lngSize)
     End If
  End If

Exit_strGet_URL_Part:

  On Error Resume Next
 
  strGet_URL_Part = strReturn
 
Err_strGet_URL_Part:

  On Error Resume Next
 
  strReturn = ""
 
  Resume Exit_strGet_URL_Part

End Function
Public Sub Test_GetURLPart()

  Dim strURL                                            As String
 
  On Error Resume Next
 
' Const lngURL_PART_SCHEME                              As Long = 1&
  Const lngURL_PART_HOSTNAME                            As Long = 2&
' Const lngURL_PART_USERNAME                            As Long = 3&
' Const lngURL_PART_PASSWORD                            As Long = 4&
' Const lngURL_PART_PORT                                As Long = 5&
' Const lngURL_PART_QUERY                               As Long = 6&
 
  strURL = "https://www.experts-exchange.com/Programming/Programming_Languages/Visual_Basic/askQuestion.jsp"
 
  MsgBox Replace(strGet_URL_Part(strURL, lngURL_PART_HOSTNAME), Left$(strURL & "/", InStr(strURL, "/") - 1), ""), _
         vbInformation Or vbOKOnly
 
End Sub



BFN,

fp.
PS. egl1044's API suggestion:

[ http://msdn.microsoft.com/library/en-us/wininet/wininet/internetcrackurl.asp ]

And sample class file showing code for usage of...
InternetCanonicalizeUrl
InternetCrackUrl
InternetCreateUrl

[ http://vb.mvps.org/samples/project.asp?id=NetUrl ]

' *************************************************************************
'  Copyright ©1998-2000 Karl E. Peterson
'  All Rights Reserved, http://www.mvps.org/vb
' *************************************************************************
'  You are free to use this code within your own applications, but you
'  are expressly forbidden from selling or otherwise distributing this
'  source code, non-compiled, without prior written consent.
' *************************************************************************
Option Explicit

'=====================================================
'=========== INTERNet URL DECLARATIONS ===============
'=====================================================
Private Declare Function InternetCrackUrl Lib "wininet.dll" Alias "InternetCrackUrlA" (ByVal lpszUrl As String, ByVal dwUrlLength As Long, ByVal dwFlags As Long, lpUrlComponents As URL_COMPONENTS) As Long
Private Declare Function InternetCanonicalizeUrl Lib "wininet.dll" Alias "InternetCanonicalizeUrlA" (ByVal lpszUrl As String, ByVal lpszBuffer As String, lpdwBufferLength As Long, ByVal dwFlags As Long) As Long
Private Declare Function InternetCreateUrl Lib "wininet.dll" Alias "InternetCreateUrlA" (lpUrlComponents As URL_COMPONENTS, ByVal dwFlags As Long, ByVal lpszUrl As String, lpdwUrlLength As Long) As Long

Private Type URL_COMPONENTS       'typedef struct {
   StructSize As Long            '    DWORD dwStructSize;
   Scheme As String              '    LPSTR lpszScheme;
   SchemeLength As Long          '    DWORD dwSchemeLength;
   nScheme As Long               '    INTERNET_SCHEME nScheme;
   HostName As String            '    LPSTR lpszHostName;
   HostNameLength As Long        '    DWORD dwHostNameLength;
   nPort As Long                 '    INTERNET_PORT nPort;
   UserName As String            '    LPSTR lpszUserName;
   UserNameLength As Long        '    DWORD dwUserNameLength;
   Password As String            '    LPSTR lpszPassword;
   PasswordLength As Long        '    DWORD dwPasswordLength;
   URLPath As String             '    LPSTR lpszUrlPath;
   UrlPathLength As Long         '    DWORD dwUrlPathLength;
   ExtraInfo As String           '    LPSTR lpszExtraInfo;
   ExtraInfoLength As Long       '    DWORD dwExtraInfoLength;
End Type                         '} URL_COMPONENTS;
'
' Enumerated internet schemes
'
Public Enum InetSchemes
   InternetSchemePartial = -2
   InternetSchemeUnknown = -1
   InternetSchemeDefault = 0
   InternetSchemeFtp
   InternetSchemeGopher
   InternetSchemeHttp
   InternetSchemeHttps
   InternetSchemeFile
   InternetSchemeNews
   InternetSchemeMailto
   InternetSchemeSocks
   InternetSchemeFirst = InternetSchemeFtp
   InternetSchemeLast = InternetSchemeSocks
End Enum

' Flags for InternetCrackUrl() and InternetCreateUrl()
Private Const ICU_ESCAPE = &H80000000      '// (un)escape URL characters
Private Const ICU_USERNAME = &H40000000    '// use internal username & password

' Flags for InternetCanonicalizeUrl() and InternetCombineUrl()
Private Const ICU_NO_ENCODE = &H20000000   '// Don't convert unsafe characters to escape sequence
Private Const ICU_DECODE = &H10000000      '// Convert %XX escape sequences to characters
Private Const ICU_NO_META = &H8000000      '// Don't convert .. etc. meta path sequences
Private Const ICU_ENCODE_SPACES_ONLY = &H4000000  '// Encode spaces only
Private Const ICU_BROWSER_MODE = &H2000000 '// Special encode/decode rules for browser

' Possible error codes
Private Const ERROR_INSUFFICIENT_BUFFER As Long = 122

' Set aside storage for private member variables.
Private m_Url As String
Private m_UrlCmp As URL_COMPONENTS
Private m_UrlCmpMT As URL_COMPONENTS
Private m_LastDllError As Long

' ********************************************
'  Initialize
' ********************************************
Private Sub Class_Initialize()
   ' set size into "empty" structure
   m_UrlCmpMT.StructSize = Len(m_UrlCmpMT)
End Sub

' ********************************************
'  Public Properties
' ********************************************
Public Property Let ExtraInfo(ByVal NewVal As String)
   m_UrlCmp.ExtraInfo = NewVal
   m_UrlCmp.ExtraInfoLength = Len(NewVal)
End Property

Public Property Get ExtraInfo() As String
   ExtraInfo = m_UrlCmp.ExtraInfo
End Property

Public Property Let HostName(ByVal NewVal As String)
   m_UrlCmp.HostName = NewVal
   m_UrlCmp.HostNameLength = Len(NewVal)
End Property

Public Property Get HostName() As String
   HostName = m_UrlCmp.HostName
End Property

Public Property Let Password(ByVal NewVal As String)
   m_UrlCmp.Password = NewVal
   m_UrlCmp.PasswordLength = Len(NewVal)
End Property

Public Property Get Password() As String
   Password = m_UrlCmp.Password
End Property

Public Property Let Port(ByVal NewVal As Long)
   m_UrlCmp.nPort = NewVal
End Property

Public Property Get Port() As Long
   Port = m_UrlCmp.nPort
End Property

Public Property Let Scheme(ByVal NewVal As InetSchemes)
   m_UrlCmp.nScheme = NewVal
End Property

Public Property Get Scheme() As InetSchemes
   Scheme = m_UrlCmp.nScheme
End Property

Public Property Let URLPath(ByVal NewVal As String)
   m_UrlCmp.URLPath = NewVal
   m_UrlCmp.UrlPathLength = Len(NewVal)
End Property

Public Property Get URLPath() As String
   URLPath = m_UrlCmp.URLPath
End Property

Public Property Let UserName(ByVal NewVal As String)
   m_UrlCmp.UserName = NewVal
   m_UrlCmp.UserNameLength = Len(NewVal)
End Property

Public Property Get UserName() As String
   UserName = m_UrlCmp.UserName
End Property

Public Property Let Value(ByVal NewVal As String)
   m_Url = NewVal
   m_UrlCmp = m_UrlCmpMT
   m_LastDllError = 0
   Call CrackUrl(m_Url, m_UrlCmp)
End Property

Public Property Get Value() As String
Attribute Value.VB_UserMemId = 0
   m_LastDllError = 0
   Value = CreateUrl(m_UrlCmp)
End Property

' ********************************************
'  Public Properties // Read-Only
' ********************************************
Public Property Get LastDllError() As Long
   LastDllError = m_LastDllError
End Property
   
Public Property Get SchemeName() As String
   SchemeName = m_UrlCmp.Scheme
End Property

' ********************************************
'  Private Methods
' ********************************************
Private Function CreateUrl(cmp As URL_COMPONENTS) As String
   Dim Buffer As String
   Dim BufLen As Long
   
   ' prepare buffers
   Buffer = String$(1024, 0)
   BufLen = Len(Buffer)
   cmp.StructSize = Len(cmp)
   
   ' try to put it all together
   If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
      CreateUrl = TrimNull(Buffer)
   Else
      If Err.LastDllError = ERROR_INSUFFICIENT_BUFFER Then
         ' bump buffer and try again
         Buffer = String$(BufLen, 0)
         If InternetCreateUrl(cmp, ICU_ESCAPE, Buffer, BufLen) Then
            CreateUrl = TrimNull(Buffer)
         End If
      Else
         m_LastDllError = Err.LastDllError
         Debug.Print "InternetCreateUrl error: " & m_LastDllError
      End If
   End If
End Function

Private Function CrackUrl(ByVal URL As String, cmp As URL_COMPONENTS) As Boolean
   Dim Buffer As String
   Dim BufLen As Long
   Const BufSize = 1024
   '
   ' Try to insure a valid URL to begin with
   '
   Buffer = Space$(BufSize)
   BufLen = Len(Buffer)
   If InternetCanonicalizeUrl(URL, Buffer, BufLen, ICU_BROWSER_MODE) Then
      URL = Left(Buffer, BufLen)
      '
      ' Reset structure and supply buffers
      '
      With cmp
         .StructSize = Len(cmp)
         .Scheme = Space$(BufSize)
         .SchemeLength = BufSize
         .nScheme = InternetSchemeUnknown
         .HostName = Space$(BufSize)
         .HostNameLength = BufSize
         .nPort = 0
         .UserName = Space$(BufSize)
         .UserNameLength = BufSize
         .Password = Space$(BufSize)
         .PasswordLength = BufSize
         .URLPath = Space$(BufSize)
         .UrlPathLength = BufSize
         .ExtraInfo = Space$(BufSize)
         .ExtraInfoLength = BufSize
      End With
      '
      ' Crack URL apart and get what we can from it.
      ' Note: API requires Len(URL) param to include trailing null!
      '
      CrackUrl = CBool(InternetCrackUrl(URL, Len(URL) + 1, ICU_ESCAPE, cmp))
      '
      ' Clean up structure to get rid of crapola
      '
      With cmp
         .Scheme = Left$(.Scheme, .SchemeLength)
         .HostName = Left$(.HostName, .HostNameLength)
         .UserName = Left$(.UserName, .UserNameLength)
         .Password = Left$(.Password, .PasswordLength)
         .URLPath = Left$(.URLPath, .UrlPathLength)
         .ExtraInfo = Left$(.ExtraInfo, .ExtraInfoLength)
      End With
   Else
      CrackUrl = False
   End If
End Function

Private Function TrimNull(ByVal StrIn As String) As String
   Dim nul As Long
   '
   ' Truncate input string at first null.
   ' If no nulls, perform ordinary Trim.
   '
   nul = InStr(StrIn, vbNullChar)
   Select Case nul
      Case Is > 1
         TrimNull = Left(StrIn, nul - 1)
      Case 1
         TrimNull = ""
      Case 0
         TrimNull = Trim(StrIn)
   End Select
End Function


BFN,

fp.
Nigel,

Would you be so kind as to see if I am missing anything in my RegExp pattern string?  It is trying to match:

[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]

That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules
for the top-level domains...).

Regards,

Patrick
Heres the example I came up with


Option Explicit

    Private Type URL_COMPONENTS
       StructSize           As Long
       Scheme               As String
       SchemeLength         As Long
       nScheme              As Long
       HostName             As String
       HostNameLength       As Long
       nPort                As Long
       UserName             As String
       UserNameLength       As Long
       Password             As String
       PasswordLength       As Long
       URLPath              As String
       UrlPathLength        As Long
       ExtraInfo            As String
       ExtraInfoLength      As Long
    End Type
   
    Private Declare Function InternetCrackUrlA Lib "wininet.dll" ( _
        ByVal lpszUrl As String, _
        ByVal dwUrlLength As Long, _
        ByVal dwFlags As Long, _
        lpUrlComponents As URL_COMPONENTS) As Long
   
    Dim uCmp As URL_COMPONENTS
   
Public Function CrackUrl(ByVal url As String) As String
    With uCmp
        .StructSize = Len(uCmp)
        .HostName = Space$(1024)
        .HostNameLength = 1024
        InternetCrackUrlA url, Len(url), &H80000000, uCmp
        CrackUrl = Left$(.HostName, InStr(1, .HostName, Chr$(0)) - 1)
    End With
End Function

Private Sub Command1_Click()
    MsgBox CrackUrl("http://www.google.com/Programming/")
End Sub
Who's Nigel
Hepen,

Will your text *always* start with "http://"?  My RegExp-enabled function will cope with that eventuality just fine,
but I am not sure the API-based methods will...

(Meaning no disrespect at all toward Nigel or egl1044, of course!)

Regards,

Patrick
egl1044,
> Who's Nigel

fanpages = Nigel

:)

Regards,

Patrick
haha ok, well author has 3 ways to do this now so I think he got his answere.
egl1044,
> haha ok, well author has 3 ways to do this now so I think he got his answere.

LOL!

Regards,

Patrick
as far as no "http://" in the string:
string=replace(string, "http://","")
ASKER CERTIFIED SOLUTION
Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Mark,

> why always go for the api or the difficult way?

Your method will work just fine, but *only* if the string we want is *always* bounded by "//" on the left and "/" on
the right.  The Asker gave us one example, not a rule that it would always follow that example.

Regards,

Patrick
Why not just Split()? I don't see any obvious faults with that concept, unless user&pass/port are tagged to the syntax.

Option Explicit
Private Sub Form_Load()
    Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/Programming/askQuestion.jsp"))
    Call MsgBox(ReturnBaseDomain("ftp://www.experts-exchange.com/Programming/askQuestion.jsp"))
    Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/askQuestion.jsp"))
    Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com"))
    Call MsgBox(ReturnBaseDomain("https://www.experts-exchange.com/"))
End Sub
Private Function ReturnBaseDomain(ByRef url As String) As String
    Dim s() As String
    s = Split(url, "/")
    If (Right$(s(0), 1) = ":") Then
        ReturnBaseDomain = s(2)
    Else
        ReturnBaseDomain = s(0)
    End If
End Function
The "Split" approach should work just fine as long as you could detect for the presence of "/" or "\" (depending on how the URL was presented/specified in the originating environment; e.g. DOS/Windows or UNIX based).

PS. With regards stripping the optional prefix of "Scheme" ("http://" "ftp://" "https://" "file://" "telnet://" etc)

Note the use of the Replace(...) function in my code demonstrating the strGet_URL_Part(...) method:

MsgBox Replace(strGet_URL_Part(strURL, lngURL_PART_HOSTNAME), Left$(strURL & "/", InStr(strURL, "/") - 1), ""), _
         vbInformation Or vbOKOnly

PPS. Patrick...

"Would you be so kind as to see if I am missing anything in my RegExp pattern string?  It is trying to match:

[two or more characters being any number, letter, or hyphen] + [period] + [two or more characters being any number, letter, or hyphen] +
[a period followed by two or more characters being any number, letter, or hyphen, this entire block repeated zero or more times]

That sounds like a valid answer to me, without getting *really* complicated (I know, for example, that this does not fit the strictest rules for the top-level domains...)."


I have seen a huge 'regexp' string for e-mail validation in my surfin' travels, & I'm sure there was a similar one for URL parsing... I'll see if I can find it again.

BFN,

fp.
PPPS. [!] Mark_FreeSoftware:

Private Function TrimURL(ByVal str As String) As String
Dim n As Long
   n = InStr(str, "//")
   If n > 0 Then str = Right$(str, Len(str) - n - 1)
   n = InStr(str, "/")
   If n > 0 Then str = Left$(str, n - 1)

  TrimURL = str ' *** ADDED :)

End Function


BFN,

fp.
Hi again Patrick,

Here's a few 'regexp' examples for URL parsing:

[ http://textsnippets.com/posts/show/523 ]
"^((http|https):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+\.[^#?\s]+)(.*?)(#[\w\-]+)?$"

(Obviously need to add more "protocol scheme" prefixes to that, e.g. "mailto", "ftp", "ftps", "file", "telnet", "ldap", "news", "gopher", "data", "finger", "local", "local-cgi", "about", and any more I've missed!)

(More searching...)
Comprehensive list here:
[ http://www.iana.org/assignments/uri-schemes.html ]

Further reading here:
[ http://esw.w3.org/topic/UriSchemes ]
[ http://www.faqs.org/rfcs/rfc1738.html ]


PS. Anybody good with converting Python scripts to VB(A)?
[ http://docs.python.org/lib/module-urlparse.html ]
and
[ http://mail.python.org/pipermail/doc-sig/2001-March/001456.html ]
r'\b((?:http|ftp|https|mailto)://[\w@&#-_.!~*();]+\b/?)'


BFN,

fp.
Thanks for asking & subsequently closing the question, Hepen.

Hopefully all the options should prove to be a valuable resource for future question askers/Experts alike :)

BFN,

fp.

lol fanpages, that's like a quadropost ;)

matthewspatrick,

> Your method will work just fine, but *only* if the string we want is *always* bounded by "//" on the left and "/" on
> the right.  The Asker gave us one example, not a rule that it would always follow that example.


no, it works fine without
try it !

(i did, with these urls:
http://www.test.com
http://www.test.com/test
www.test.com
www.test.com/test)

and they all worked
Hi Mark,

Thanks... erm, I think.  "quadropost"?  A phrase I'm not familiar with :(

Is that German or another great divide of the language used by both UK & US cousins?

BFN,

fp.

hehe it's not german, it's....
err i don't know


quadro is italian i think it means 4


and no i'm not an italian ;)
PS. The e-mail validation "regexp" I was referring to:

In a word, OUCH!

[ http://regular-expressions.com/email.html ]

(?:(?:\r\n)?[ \t])*(?:(?:(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:
(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.
|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t]
)*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=
[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(
?:(?:\r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \
x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\
[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])
*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[
\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*
))*|(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(
?=[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*
"(?:(?:\r\n)?[ \t])*)*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".
\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\
]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?
[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\
Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[
 \t])*))*(?:,@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F
]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]
\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()
<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@
,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*)*:(?
:(?:\r\n)?[ \t])*)?(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\
n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?
:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[
^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()
<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\
r\n)?[ \t])*))*@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[
\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^
()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<
>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*\>(
?:(?:\r\n)?[ \t])*)|(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(
?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)*:(?:(?:\r\n)?[ \t])*(?:(?:
(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\
["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:
(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \
x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"
(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:
(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?
[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(
?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x
00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[
([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*|(?:[^()<>@,;:\\".\[\]
 \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))
|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)*\<(
?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:
\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*
\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\
[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]
]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*(?:,@(?:(?:\r\n)
?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|
\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?
[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]
+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\
r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*)*:(?:(?:\r\n)?[ \t])*)?(?:[^(
)<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>
@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\
n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x
1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"(?:[^\
"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:(?:\r\
n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])
+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n
)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1
F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\
]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*\>(?:(?:\r\n)?[ \t])*)(?:,\s
*(?:(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(
?=[\["()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*
"(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[
\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]
))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*
@(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r
\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\]
(?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\
] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]])
)|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*|(?:[^()<>@,;:\\".
\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\
]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*)
*\<(?:(?:\r\n)?[ \t])*(?:@(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?
:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\
\.)*\](?:(?:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\
\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".
\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*(?:,@(?:(?:\
r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t
])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:\r
\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\
x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\
[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*)*:(?:(?:\r\n)?[ \t])*)?(?
:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["
()<>@,;:\\".\[\]]))|"(?:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?
:\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x0
0-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|"(?
:[^\"\r\\]|\\.|(?:(?:\r\n)?[ \t]))*"(?:(?:\r\n)?[ \t])*))*@(?:(?
:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00-\x1F]+(?:(?:(?:\r\n)?[
\t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([^\[\]\r\\]|\\.)*\](?:(?:
\r\n)?[ \t])*)(?:\.(?:(?:\r\n)?[ \t])*(?:[^()<>@,;:\\".\[\] \x00
-\x1F]+(?:(?:(?:\r\n)?[ \t])+|\Z|(?=[\["()<>@,;:\\".\[\]]))|\[([
^\[\]\r\\]|\\.)*\](?:(?:\r\n)?[ \t])*))*\>(?:(?:\r\n)?[ \t])*))*
)?;\s*)

See also:
[ http://sleeksoft.co.uk/public/techblog/articles/20050121_3.html ]
(A blog/site owned by a previous colleague of mine)

BFN,

fp.
Well, yes, "quad" is Latin for 'four'.

I just didn't understand what you meant by "quadropost".

whoever wrote that,

he has too much free time :P
[And whomever just read it...? ;)]
More to digest, Patrick:

[ http://www.cambiaresearch.com/cambia3/snippets/csharp/regex/uri_regex.aspx ]

regexPattern = @"^(?<s1>(?<s0>[^:/\?#]+):)?(?<a1>"
      + @"//(?<a0>[^/\?#]*))?(?<p0>[^\?#]*)"
      + @"(?<q1>\?(?<q0>[^#]*))?"
      + @"(?<f1>#(?<f0>.*))?";
And this may have some relevance - note the comment about URLs without domains; those that are relative!

[ http://www.cflib.org/udf.cfm?ID=494 ]

===
<cfscript>
/**
 * Returns the host from a specified URL.
 * RE fix for MX, thanks to Tom Lane
 *
 * @param this_url        URL to parse. (Required)
 * @return Returns a string.
 * @author Shawn Seley (shawnse@aol.com)
 * @version 2, August 23, 2002
 */
function GetHostFromURL(this_url) {
      var first_char       = "";
      var re_found_struct  = "";
      var num_expressions  = 0;
      var num_dots         = 0;
      var this_host        = "";
      
      this_url = trim(this_url);
      
      first_char = Left(this_url, 1);
      if (Find(first_char, "./")) {
            return "";   // relative URL = no host   (ex: "../dir1/filename.html" or "/dir1/filename.html")
      } else if(Find("://", this_url)){
            // absolute URL    (ex: "pass@ftp.host.com">ftp://user:pass@ftp.host.com")
            re_found_struct = REFind("[^@]*@([^/:\?##]+)|([^/:\?##]+)", this_url, Find("://", this_url)+3, "True");
      } else {
            // abbreviated URL (ex: "user:pass@ftp.host.com")
            re_found_struct = REFind("[^@]*@([^/:\?##]+)|([^/:\?##]+)", this_url, 1, "True");
      }
      
      if (re_found_struct.pos[1] GT 0) {
            num_expressions = ArrayLen(re_found_struct.pos);
                if(re_found_struct.pos[num_expressions] is 0) num_expressions = num_expressions - 1;
            this_host = Mid(this_url, re_found_struct.pos[num_expressions], re_found_struct.len[num_expressions]);
            num_dots = (Len(this_host) - Len(Replace(this_host, ".", "", "ALL")));;
            if ((not FindOneOf("/@:", this_url)) and (num_dots LT 2)){
                  // since this URL doesn't contain any "/" or "@" or ":" characters and since the "host" has fewer than two dots (".")
                  // then it is probably actually a file name
                  return "";
            }
            return this_host;
      } else {
            return "";
      }
}
</cfscript>
===


Also note the originating page where I found a link to the above code:

[ http://www.houseoffusion.com/cf_lists/message.cfm/forumid:4/messageid:126884 ]

"I found the GetHostFromURL() function on CFLib. The function seems to
work pretty well, it even strips port numbers etc. The problem with that
function is that it will strip anything before the first "." if there is
anything. So, http://www.something.com is returned as something.com .
However I need the hostname as well. I would modify the existing
function, however I am not too keen on RegEx atm.

...chris.alvarado
[ application developer ]
4 Guys Interactive, Inc.
..."
Nigel,

re: excruciatingly long URL and email address parsing pattern strings

I guess I just re-learned the meaning of "be careful of what you ask for".

Double :)

Patrick
I never leave any turn unstoned.