Regular expression componenent for delphi 5

does anyone know a free object/component for Regular exporession for delphi 5?

thanx
T.
LVL 1
tomer_engelAsked:
Who is Participating?
I wear a lot of hats...

"The solutions and answers provided on Experts Exchange have been extremely helpful to me over the last few years. I wear a lot of hats - Developer, Database Administrator, Help Desk, etc., so I know a lot of things but not a lot about one thing. Experts Exchange gives me answers from people who do know a lot about one thing, in a easy to use platform." -Todd S.

DragonSlayerCommented:
Check out TurboPower's SysTool http://sourceforge.net/projects/tpsystools/
0
Lee_NoverCommented:
I'm using TRegExpr - http://RegExpStudio.com
0

Experts Exchange Solution brought to you by

Your issues matter to us.

Facing a tech roadblock? Get the help and guidance you need from experienced professionals who care. Ask your question anytime, anywhere, with no hassle.

Start your 7-day free trial
Russell LibbySoftware Engineer, Advisory Commented:
Something I wrote last year. Its free, its fast, and it was written in Delphi5. If nothing else, it should provide a good (clean) example of a finite state machine (see the MatchSubString method for the actual token execution)

Regards,
Russell

----

unit RegExpr;
////////////////////////////////////////////////////////////////////////////////
//
//   Unit        :  REGEXPR
//   Description :  Regular expression parser for Delphi
//   Date        :  10.14.2003
//
//   Description :
//
//   ^        =  Start anchor indicator (if used in first position)
//   $        =  End anchor indicator (must be final character)
//   ( | )    =  Expression OR Expression. Parens start the ORing of two or more
//               sub expressions. For multiple ORing the following syntax can be
//               used: (expr1|expr2|expr3|...) Cannot have post modifiers
//   ^        =  Must be used after [, negates a class
//   .        =  Match any char (cannot be negated, and cannot have post modifiers)
//   *        =  Post modifier, match zero or more occurrences of char/char class
//   ?        =  Post modifier, match zero or one occurences of char/char class
//   +        =  Post modifier, match one or more occurences of char/char class
//   {}       =  Post modifier, allows for the patterns {m}, {m,} and {m, n} where
//               {m} matches exactly m occurrences of the char/char class
//               {m,} matches m occurences or more of the char/char class
//               {m, n} matches m to n occurences of the char / char class
//   []       =  Char class, cannot contain ".", pre/post modifiers, or other meta
//               characters except for "-".
//   -        =  Used to define char ranges in char class
//   @0..9    =  Mark the current bead position (integer value). Useful for seperating
//               sub parts of the expression
//   \        =  Escaped char: The following have special meaning
//               \a    =  Bell
//               \b    =  Backspace
//               \f    =  Formfeed
//               \n    =  Newline
//               \r    =  Carriage return
//               \t    =  Tab
//               \s    =  Space
//               \v    =  Vertical tab
//               All other escaped charactes are translated as literals. This allows
//               the use of what would otherwise be meta characters. (ie: *, ., [, ...)
//   %        =  Extended char class range. Must be trailed by one of the following:
//               %a    =  0-9, A-Z, a-z
//               %c    =  A-Z, a-z
//               %d    =  0-9
//               %h    =  0-9, A-F, a-f
//               %i    =  \_, \-, \$, 0-9, A-Z, a-z
//               %n    =  0-9
//               %w    =  \n, \r, \s, \t
//               Any other trailer will cause an expression error.
//   x        =  All other characters are taken as literal values.
//
////////////////////////////////////////////////////////////////////////////////
interface

{$Z4}
uses
  Windows, SysUtils, Classes;

////////////////////////////////////////////////////////////////////////////////
//   Regular expression exception class
////////////////////////////////////////////////////////////////////////////////
type
  ERegExpr          =  class(Exception);

////////////////////////////////////////////////////////////////////////////////
//   Regular expression data types
////////////////////////////////////////////////////////////////////////////////
type
  PCharSet          =  ^TCharSet;
  TCharSet          =  Set of Char;

////////////////////////////////////////////////////////////////////////////////
//   Regular expression instruction types
////////////////////////////////////////////////////////////////////////////////
type
  TRegType          =  (rtTerminal, rtBead, rtMark, rtMarkOr, rtMarkEnd, rtAnyChar,
                        rtChar, rtClass, rtNegClass);

////////////////////////////////////////////////////////////////////////////////
//   Regular expression post modifier types
////////////////////////////////////////////////////////////////////////////////
type
  TRegPost          =  (rpNone, rpOne, rpZMore, rpZOne, rpOneMore, rpExact, rpCMore, rpCtoC);

////////////////////////////////////////////////////////////////////////////////
//   Regular expression state structure
////////////////////////////////////////////////////////////////////////////////
type
  PRegState         =  ^TRegState;
  TRegState         =  packed record
     regType:       TRegType;
     regPost:       TRegPost;
     dwMin:         Integer;
     dwMax:         Integer;
     case Integer of
        0  :  (lpClass:       PCharSet);
        1  :  (cChar:         Char);
        2  :  (dwJump:        Integer);
  end;

////////////////////////////////////////////////////////////////////////////////
//   Regular expression constants
////////////////////////////////////////////////////////////////////////////////
const
  REG_NILJUMP       =  0;
  REG_NEGCLASS:     Array [False..True] of TRegType = (rtClass, rtNegClass);
  REG_TERMCHARS:    Set of Char = [#0, '$', '(', ')', '|'];
  REG_METACHARS:    Set of Char = ['$', '(', ')', '|', '^', '*', '?', '+', '-',
                                   '@', '[', ']', '.', '{', '}', ',', '%'];
  REG_PARSEERRORS:  Array [0..15] of PChar =
                    ('Expression pattern has not been set',            // 0
                     'Expression pattern cannot be blank',             // 1
                     'Invalid pattern expression',                     // 2
                     'Unexpected end of expression at position (%d)',  // 3
                     'Unexpected meta character at position (%d)',     // 4
                     'Characters after end anchor at position (%d)',   // 5
                     'Invalid special identifier at position (%d)',    // 6
                     'Invalid empty set at position (%d)',             // 7
                     'Min count must be less than max count (%d)',     // 8
                     'Expected a closing brace at position (%d)',      // 9
                     'Exact match count cannot be set to zero (%d)',   // 10
                     'Expected a numeric at position (%d)',            // 11
                     'Invalid numeric expression at position (%d)',    // 12
                     'Expected a pipe at position (%d)',               // 13
                     'Expected a closing paren at position (%d)',      // 14
                     'Bead markers must be bewteen 0 and 9'            // 15
                    );

////////////////////////////////////////////////////////////////////////////////
//   Regular expression class
////////////////////////////////////////////////////////////////////////////////
type
  TRegExpr          =  class(TObject)
  private
     // Private declarations
     FBeadPos:      Array [0..9] of Integer;
     FAncStart:     Boolean;
     FAncEnd:       Boolean;
     FMatchCase:    Boolean;
     FMatched:      Boolean;
     FMatchPos:     Integer;
     FMatchLen:     Integer;
     FTable:        TList;
     FError:        PChar;
     FPos:          PChar;
     FParsed:       Boolean;
     FPattern:      PChar;
     FSource:       PChar;
     FCursor:       PChar;
     FAnchor:       PChar;
  protected
     // Protected declarations
     function       GetSource: PChar;
     function       GetPattern: PChar;
     function       GetPosition: Integer;
     function       GetBeadPos(Index: Integer): Integer;
     procedure      SetSource(Source: PChar);
     procedure      SetPattern(Pattern: PChar);
     procedure      SetMatchCase(CaseSensitive: Boolean);
     procedure      ClearStateTable;
     function       AllocState(StateType: TRegType): PRegState;
     procedure      FreeState(State: PRegState);
     procedure      ParseSubExpression;
     procedure      ParseParens;
     procedure      ParseExpression;
     procedure      ParseAnyChar;
     procedure      ParseCharClass;
     procedure      ParseChar;
     procedure      ParseCharRange(CClass: PCharSet);
     procedure      ParseSpecialRange(CClass: PCharSet);
     function       ParseCCChar: Char;
     function       ParseEscChar: Char;
     function       ParseNum: Integer;
     procedure      ParsePostModifier(State: PRegState);
     procedure      ResetMatched;
     procedure      ResetAll;
     function       Match(Start: Boolean): Boolean;
     function       MatchSubString: Boolean;
     function       ScanLineStart: Boolean;
     procedure      RaiseError(Error: Integer; InsertPos: Boolean = False);
  public
     // Public declarations
     constructor    Create;
     constructor    CreatePattern(Pattern: PChar);
     destructor     Destroy; override;
     function       Test(Pattern: PChar): Boolean;
     function       MatchFirst: Boolean;
     function       MatchNext: Boolean;
     property       BeadPos[Index: Integer]: Integer read GetBeadPos;
     property       CaseSensitive: Boolean read FMatchCase write SetMatchCase;
     property       MatchPos: Integer read FMatchPos;
     property       MatchLen: Integer read FMatchLen;
     property       Matched: Boolean read FMatched;
     property       Pattern: PChar read GetPattern write SetPattern;
     property       Position: Integer read GetPosition;
     property       Source: PChar read GetSource write SetSource;
  end;

implementation

////////////////////////////////////////////////////////////////////////////////
//   TRegExpr
////////////////////////////////////////////////////////////////////////////////
procedure TRegExpr.RaiseError(Error: Integer; InsertPos: Boolean = False);
begin

  // Check to see if we should be formatting this
  if InsertPos then
     // Formatted
     StrFmt(FError, REG_PARSEERRORS[Error], [Succ(FPos-FPattern)])
  else
     // Straight copy
     StrCopy(FError, REG_PARSEERRORS[Error]);

  // Raise the exception with the detailed information
  raise ERegExpr.Create(FError);

end;

function TRegExpr.GetBeadPos(Index: Integer): Integer;
begin

  // Check index range for bead position
  if not(Index in [0..9]) then RaiseError(15);

  // Return the bead marker
  result:=FBeadPos[Index];

end;

function TRegExpr.Test(Pattern: PChar): Boolean;
begin

  // Set the pattern to test
  SetPattern(Pattern);

  // Call MatchFirst and return results
  result:=MatchFirst;

end;

function TRegExpr.GetPosition: Integer;
begin

  // Return the current anchor position within the source text
  result:=FAnchor-FSource;

end;

function TRegExpr.MatchNext: Boolean;
begin

  // Reset matched state
  ResetMatched;

  // Call Match
  result:=Match(False);

end;

function TRegExpr.MatchFirst: Boolean;
begin

  // Reset matched state and source starting position
  ResetAll;

  // Call Match
  result:=Match(True);

end;

function TRegExpr.Match(Start: Boolean): Boolean;
begin

  // Set default result
  result:=False;

  // Check to make sure the expression was parsed
  if FParsed then
  begin
     // Check anchor start
     if FAncStart then
     begin
        // If not the first time then scan to a new line
        if not(Start) then ScanLineStart;
        // Keep looping until match, or cursor is at the end
        while (FCursor^ > #0) do
        begin
           // Search for the pattern
           result:=MatchSubString;
           if result then break;
           // Increment the anchor
           Inc(FAnchor);
           // Scan to the next line
           if not(ScanLineStart) then break;
        end;
     end
     else
     begin
        // Keep looping until match, or cursor is at the end
        while (FCursor^ > #0) do
        begin
           // Search for the pattern
           result:=MatchSubString;
           if result then break;
           // Increment the anchor
           Inc(FAnchor);
           // Set the cursor to the anchor
           FCursor:=FAnchor;
        end;
     end;
     // If we did not succeed then we must perform a full reset
     if result then
     begin
        // Set match pos and length
        FMatched:=True;
        FMatchPos:=FAnchor-FSource;
        FMatchLen:=FCursor-FAnchor;
        // If match length is zero, which can happen when matching zero times, then
        // we may run into cases where the cursor does not get incremented. We need
        // to increment for those cases
        if ((FMatchLen = 0) and (FCursor^ > #0)) then Inc(FCursor);
        // Set the next starting search point
        FAnchor:=FCursor;
     end
     else
        ResetAll;
  end
  else
     // Not parsed
     RaiseError(0);

end;

function TRegExpr.MatchSubString: Boolean;
var  prsNode:    PRegState;
     ppaTable:   PPointerList;
     dwCharLoop: Integer;
     dwIndex:    Integer;
     dwBranch:   Integer;
     lpszHold:   PChar;
     bMatch:     Boolean;
     cChar:      Char;
begin

  // Set default result (optimisitic)
  result:=True;

  // Set node pointer
  prsNode:=nil;

  // Map in the table as a straight pointer array
  ppaTable:=FTable.List;

  // Clear bead positions
  dwIndex:=10;
  repeat
     Dec(dwIndex);
     FBeadPos[dwIndex]:=0;
  until (dwIndex = 0);

  // Set string index and default jump pointers
  lpszHold:=FCursor;
  dwBranch:=REG_NILJUMP;

  // Loop until we are done
  while True do
  begin
     // Get the state record and make sure the index is incremented
     prsNode:=ppaTable^[dwIndex];
     Inc(dwIndex);
     // Check for terminating state or expression break
     case prsNode^.regType of
        // End of compiled expression
        rtTerminal  :  break;
        // Bead handling
        rtBead      :  FBeadPos[prsNode^.dwJump]:=FCursor-FSource;
        // START MARKER
        rtMark      :
        begin
           // Mark the current string and set the branch point
           dwBranch:=prsNode^.dwJump;
           lpszHold:=FCursor;
        end;
        // OR MARKER
        rtMarkOr    :
        begin
           // Check current result
           if result then
           begin
              // Jump out of here
              dwIndex:=prsNode^.dwJump;
              dwBranch:=REG_NILJUMP;
           end
           else
           begin
              // Reset the cursor and set new jump point
              dwBranch:=prsNode^.dwJump;
              FCursor:=lpszHold;
              // Reset the result
              result:=True;
           end;
        end;
        // END MARKER
        rtMarkEnd   :
        begin
           // Clear the jump points
           dwBranch:=REG_NILJUMP;
           // Check result
           if result then Continue;
           // Failure
           Break;
        end;
     else
        // Simple char state loop counter to handle the multiple matching
        dwCharLoop:=1;
        while True do
        begin
           // Perform the character matching (if we have string)
           if (FCursor^ > #0) then
           begin
              if FMatchCase then
                 cChar:=FCursor^
              else
                 cChar:=UpCase(FCursor^);
              case prsNode^.regType of
                 rtAnyChar   :  bMatch:=not(cChar = #10);
                 rtChar      :  bMatch:=(cChar = prsNode^.cChar);
                 rtClass     :  bMatch:=(cChar in prsNode^.lpClass^);
                 rtNegClass  :  bMatch:=not(cChar in prsNode^.lpClass^);
              else
                 // Set match flag to false
                 bMatch:=False;
              end;
              // Increment the cursor position
              Inc(FCursor, Ord(bMatch));
           end
           else
              // Set match flag to false
              bMatch:=False;
           // Handle the post operator
           case prsNode^.regPost of
              // No modifier, or single match
              rpNone,
              rpOne          :
              begin
                 result:=bMatch;
                 break;
              end;
              // Match zero or more times
              rpZMore        :  if not(bMatch) then break;
              // Match zero or one times
              rpZOne         :  break;
              // Match one of more times
              rpOneMore      :
              begin
                 if (dwCharLoop = 1) then result:=bMatch;
                 if not(bMatch) then break;
              end;
              // Match exactly count times
              rpExact        :
              begin
                 result:=bMatch;
                 if not(bMatch) or (dwCharLoop >= prsNode^.dwMin) then break;
              end;
              // Match at least count or more times
              rpCMore        :
              begin
                 if (dwCharLoop <= prsNode^.dwMin) then result:=bMatch;
                 if not(bMatch) then break;
              end;
              // Match at least min times up to max times
              rpCtoC         :
              begin
                 if (dwCharLoop <= prsNode^.dwMin) then result:=bMatch;
                 if not(bMatch) or (dwCharLoop >= prsNode^.dwMax) then break;
              end;
           end;
           // Increment the char loop counter
           Inc(dwCharLoop);
        end;
        // Check for failure
        if not(result) then
        begin
           // If no jump is set then we fail
           if (dwBranch = REG_NILJUMP) then break;
           // Perform the jump
           dwIndex:=dwBranch;
        end;
     end;
  end;

  // Success, we still need to check against the terminal state in case we
  // ran out of string to process
  if result and Assigned(prsNode) then
  begin
     // Make sure we processed the terminal state
     result:=(prsNode^.regType = rtTerminal);
     // Still need to check against an end anchor if one was specified
     if (result and FAncEnd) then result:=(FCursor^ in [#0, #13, #10]);
  end
  else
     result:=False;

end;

function TRegExpr.ScanLineStart: Boolean;
begin

  // Set default result
  result:=False;

  // Need to position the cursor right after a line break
  while not(FCursor^ in [#0, #10]) do Inc(FCursor);

  // If we are at a null then there is no next line so return false
  if (FCursor^ > #0) then
  begin
     // Increment past the line feed
     Inc(FCursor);
     // Reset the anchor to match the cursor
     FAnchor:=FCursor;
     // Success
     result:=True;
  end;

end;

procedure TRegExpr.ParseSubExpression;
var  prsNode:    PRegState;
begin

  // Parse the sub expression until we hit a terminating char
  while not(FPos^ in REG_TERMCHARS) do
  begin
     // Handle the character
     case FPos^ of
        // Any character
        '.'   :  ParseAnyChar;
        // Character class
        '['   :  ParseCharClass;
        // Parse a bead position
        '@'   :
        begin
           // Skip next
           Inc(FPos);
           // Bead must be bewteen 0..9
           if not(FPos^ in ['0'..'9']) then RaiseError(15);
           // Allocate a new state
           prsNode:=AllocState(rtBead);
           prsNode^.dwJump:=Ord(FPos^)-48;
           FTable.Add(prsNode);
           // Skip next
           Inc(FPos);
        end;
        // Parse an extended char class
        '%'   :
        begin
           // Resource protection
           prsNode:=nil;
           try
              // Allocate a new state
              prsNode:=AllocState(rtClass);
              prsNode^.lpClass:=AllocMem(SizeOf(TCharSet));
              // Parse the range and post modifier
              ParseSpecialRange(prsNode^.lpClass);
              ParsePostModifier(prsNode);
              // Add to the state table
              FTable.Add(prsNode);
           except
              // Free the state
              FreeState(prsNode);
              // Re-raise the exception
              raise;
           end;
        end;
     else
        // It is a failure to be sitting at a meta char at this point
        if (FPos^ in REG_METACHARS) then
           RaiseError(4, True)
        else
           // Parse a literal or escaped character
           ParseChar;
     end;
  end;

end;

procedure TRegExpr.ParseParens;
var  prsMark:    PRegState;
     prsMarkOr:  PRegState;
     prsMarkEnd: PRegState;
     dwMark:     Integer;
begin

  // Skip starting paren
  Inc(FPos);

  // Add the mark record (hold pointer so we can fill the jump)
  prsMark:=AllocState(rtMark);
  dwMark:=FTable.Add(prsMark);

  // Parse the sub expression
  ParseSubExpression;

  // Better be sitting at a | delimiter
  if (FPos^ <> '|') then RaiseError(13, True);
  Inc(FPos);

  // Add the mark OR
  prsMarkOr:=AllocState(rtMarkOr);

  // Add to the table and set the mark jump point
  prsMark^.dwJump:=FTable.Add(prsMarkOr);

  // Switch
  prsMark:=prsMarkOr;

  // Parse the second part of the expression
  ParseSubExpression;

  // Keep iterating while we have an OR
  while (FPos^ = '|') do
  begin
     // Push next
     Inc(FPos);
     // Add the subsequent ORs
     prsMarkOr:=AllocState(rtMarkOr);
     // Add to the table and set the mark jump point
     prsMark^.dwJump:=FTable.Add(prsMarkOr);
     // Switch
     prsMark:=prsMarkOr;
     // Parse the subsequent sub expression
     ParseSubExpression;
  end;

  // End the mark
  if (FPos^ <> ')') then RaiseError(14, True);
  Inc(FPos);

  // Allocate the mark end and set loop jump plus post modifiers
  prsMarkEnd:=AllocState(rtMarkEnd);
  prsMarkEnd^.dwJump:=dwMark;

  // Add the end and update the last jump
  prsMark^.dwJump:=FTable.Add(prsMarkEnd);

end;

procedure TRegExpr.ParseExpression;
var  prsNode:       PRegState;
begin

  // Clear the current state table and reparse
  ClearStateTable;

  // Clear match settings
  ResetMatched;

  // Reset expression parser anchors
  FAncStart:=False;
  FAncEnd:=False;

  // Set parser variable
  FPos:=FPattern;

  // Check for null pattern string
  if (FPos^ = #0) then RaiseError(1);

  // Check for starting anchor
  if (FPos^ = '^') then
  begin
     // Set anchor start flag
     FAncStart:=True;
     Inc(FPos);
     // Check for null
     if (FPos^ = #0) then RaiseError(2);
  end
  else
     // Check for just an ending anchor
     if (FPos^ = '$') then RaiseError(2);

  // Catch exceptions so we can clear the state table on failure
  try
     // Parse until we hit an ending char
     while not(FPos^ in [#0, '$']) do
     begin
        case FPos^ of
           '('   :  ParseParens;
           ')'   :  RaiseError(4, True);
           '|'   :  RaiseError(4, True);
        else
           ParseSubExpression;
        end;
     end;
     // Check for tail end anchor
     if (FPos^ = '$') then
     begin
        FAncEnd:=True;
        // Push next
        Inc(FPos);
        // We must be at the null
        if (FPos^ > #0) then RaiseError(5, True);
     end;
     // Add the terminal state
     prsNode:=AllocState(rtTerminal);
     FTable.Add(prsNode);
  except
     // Clear the anchor flags
     FAncStart:=False;
     FAncEnd:=False;
     // Clear the state table
     ClearStateTable;
     // Re-raise the exception again
     raise;
  end;

end;

procedure TRegExpr.ParseChar;
var  prsNode:    PRegState;
     cChar:      Char;
begin

  // Is the pattern null
  if (FPos^ = #0) then RaiseError(3, True);

  // Check for invalid meta characters
  if (FPos^ in REG_METACHARS) then RaiseError(4, True);

  // Check for escaped character
  if (FPos^ = '\') then
     cChar:=ParseEscChar
  else
     // Literal character
     cChar:=FPos^;

  // Push next
  Inc(FPos);

  // Allocate a new state
  prsNode:=AllocState(rtChar);
  if not(FMatchCase) then
     prsNode^.cChar:=UpCase(cChar)
  else
     prsNode^.cChar:=cChar;

  try
     // Parse out any post modifier
     ParsePostModifier(prsNode);
  except
     // Chance to free the memory
     FreeState(prsNode);
     // Re-raise the exception
     raise;
  end;

  // Add to state table
  FTable.Add(prsNode);

end;

function TRegExpr.ParseEscChar: Char;
begin

  // Push next
  Inc(FPos);

  // Check the next character
  case FPos^ of
     #0    :
     begin
        result:=#0;
        RaiseError(3, True);
     end;
     'a'   :  result:=#7;
     'b'   :  result:=#8;
     'f'   :  result:=#12;
     'n'   :  result:=#10;
     'r'   :  result:=#13;
     't'   :  result:=#9;
     's'   :  result:=#32;
     'v'   :  result:=#11;
  else
     // Escape as is
     result:=FPos^;
  end;

end;

function TRegExpr.ParseCCChar: Char;
begin

  // Is the pattern null
  if (FPos^ = #0) then RaiseError(3, True);

  // Check for invalid meta characters
  if (FPos^ in REG_METACHARS) then RaiseError(4, True);

  // Check for escaped character
  if (FPos^ = '\') then
     result:=ParseEscChar
  else
     // Literal character
     result:=FPos^;

  // Push next
  Inc(FPos);

end;

procedure TRegExpr.ParseSpecialRange(CClass: PCharSet);
var  cChar:      Char;
begin

  // Push next
  Inc(FPos);

  // Is the pattern null
  if (FPos^ = #0) then RaiseError(3, True);

  // Check for invalid meta characters
  if (FPos^ in REG_METACHARS) then RaiseError(4, True);

  // Handle the special char range
  case FPos^ of
     // Alpha, numeric
     'a'   :
     begin
        for cChar:='0' to '9' do Include(CClass^, cChar);
        for cChar:='A' to 'Z' do Include(CClass^, cChar);
        for cChar:='a' to 'z' do Include(CClass^, cChar);
     end;
     // Alpha
     'c'   :
     begin
        for cChar:='A' to 'Z' do Include(CClass^, cChar);
        for cChar:='a' to 'z' do Include(CClass^, cChar);
     end;
     // Numeric
     'd'   :  for cChar:='0' to '9' do Include(CClass^, cChar);
     // Hexadecimal
     'h'   :
     begin
        for cChar:='0' to '9' do Include(CClass^, cChar);
        for cChar:='A' to 'F' do Include(CClass^, cChar);
        for cChar:='a' to 'f' do Include(CClass^, cChar);
     end;
     // Alpha, numeric, underscore, dash, dollar
     'i'   :
     begin
        Include(CClass^, '_');
        Include(CClass^, '-');
        Include(CClass^, '$');
        for cChar:='0' to '9' do Include(CClass^, cChar);
        for cChar:='A' to 'Z' do Include(CClass^, cChar);
        for cChar:='a' to 'z' do Include(CClass^, cChar);
     end;
     // White space
     'w'   :
     begin
        Include(CClass^, #9);
        Include(CClass^, #10);
        Include(CClass^, #13);
        Include(CClass^, #32);
     end;
  else
     // Invalid identifier
     RaiseError(6, True);
  end;

  // Push next
  Inc(FPos);

end;

procedure TRegExpr.ParseCharRange(CClass : PCharSet);
var  cStart:     Char;
     cEnd:       Char;
     cSwap:      Char;
begin

  // Check for special char range
  if (FPos^ = '%') then
     ParseSpecialRange(CClass)
  else
  begin
     // Parse a single character
     cStart:=ParseCCChar;
     // Check for character range
     if (FPos^ = '-') then
     begin
        // Range of characters
        Inc(FPos);
        // Parse the next character
        cEnd:=ParseCCChar;
        // Build the range as a character set
        if (cStart > cEnd) then
        begin
           // Exchange first and last
           cSwap:=cStart;
           cStart:=cEnd;
           cEnd:=cSwap;
        end;
        // Include the characters in the character set
        for cSwap:=cStart to cEnd do
        begin
           Include(CClass^, cSwap);
           if not(FMatchCase) then Include(CClass^, UpCase(cSwap));
        end;
     end
     else
     begin
        // Single character
        if FMatchCase then
           Include(CClass^, cStart)
        else
           Include(CClass^, UpCase(cStart));
     end;
  end;

end;

procedure TRegExpr.ParseCharClass;
var  prsNode:    PRegState;
     bNegate:    Boolean;
begin

  // Push next
  Inc(FPos);

  // Is this a negated class?
  bNegate:=(FPos^ = '^');
  if bNegate then Inc(FPos);

  // Check for an immediate ], which means an empty set (which is not allowed)
  if (FPos^ = ']') then RaiseError(7, True);

  // Allocate a state record and char class
  prsNode:=AllocState(REG_NEGCLASS[bNegate]);
  prsNode^.lpClass:=AllocMem(SizeOf(TCharSet));

  // Start parsing the character ranges until we hit the closing bracket
  try
     while not(FPos^ in [#0, ']']) do ParseCharRange(prsNode^.lpClass);
     // We must have the ending bracket
     if (FPos^ = #0) then RaiseError(3, True);
     // Push next
     Inc(FPos);
     // Parse out any post modifier
     ParsePostModifier(prsNode);
  except
     // Gives us the chance to clean up
     FreeState(prsNode);
     // Re-raise the exception
     raise;
  end;

  // Add to state table
  FTable.Add(prsNode);

end;

procedure TRegExpr.ParseAnyChar;
var  prsNode:    PRegState;
begin

  // Push next
  Inc(FPos);

  // Add the new state record
  prsNode:=AllocState(rtAnyChar);
  FTable.Add(prsNode);

end;

procedure TRegExpr.ParsePostModifier(State: PRegState);
begin

  // Set default post modifier
  State^.regPost:=rpOne;

  // If not a post modifier then just exit
  if not(FPos^ in ['*', '?', '+', '{']) then exit;

  // Set the post modifier
  case FPos^ of
     '*'   :  State^.regPost:=rpZMore;
     '?'   :  State^.regPost:=rpZOne;
     '+'   :  State^.regPost:=rpOneMore;
     '{'   :  State^.regPost:=rpExact;
  end;

  // Push next
  Inc(FPos);

  // If *, ?, or + then we are done
  if (State^.regPost <> rpExact) then exit;

  // Get the first number
  State^.dwMin:=ParseNum;

  // Check for end of expression
  if (FPos^ = #0) then RaiseError(3, True);

  // Next character needs to be either a , or }
  case FPos^ of
     ','   :
     begin
        // Push next
        Inc(FPos);
        // Check for end of expression
        if (FPos^ = #0) then RaiseError(3, True);
        // We may have either a closing } or a numeric
        if (FPos^ = '}') then
           State^.regPost:=rpCMore
        else
        begin
           // Going to be a min to max range
           State^.regPost:=rpCtoC;
           // Get max range count
           State^.dwMax:=ParseNum;
           // Check for end of expression
           if (FPos^ = #0) then RaiseError(3, True);
           // Min has to be less than Max
           if (State^.dwMin >= State^.dwMax) then RaiseError(8, True);
           // Check for closing }
           if (FPos^ <> '}') then RaiseError(9, True);
        end;
     end;
     '}'   :  if (State^.dwMin = 0) then RaiseError(10, True);
  else
     RaiseError(9, True);
  end;

  // Push next
  Inc(FPos);

end;

function TRegExpr.ParseNum: Integer;
var  lpszHold:   PChar;
     cMark:      Char;
     dwErr:      Integer;
begin

  // Save the current position
  lpszHold:=FPos;

  // Better be a number waiting
  if not(FPos^ in ['0'..'9']) then RaiseError(11, True);

  // Keep incrementing while numeric
  while (FPos^ in ['0'..'9']) do Inc(FPos);

  // Terminate and convert to number
  cMark:=FPos^;
  FPos^:=#0;
  Val(lpszHold, result, dwErr);
  FPos^:=cMark;

  // Check for error
  if (dwErr > 0) then
  begin
     FPos:=lpszHold;
     Inc(FPos, dwErr);
     RaiseError(12, True);
  end;

end;

procedure TRegExpr.ResetAll;
begin

  // Reset scanning positions
  FCursor:=FSource;
  FAnchor:=FSource;

  // Reset matched state
  FMatched:=False;
  FMatchPos:=0;
  FMatchLen:=0;

end;

procedure TRegExpr.ResetMatched;
begin

  // Set anchor and cursor to equal source
  FMatched:=False;
  FMatchPos:=0;
  FMatchLen:=0;

end;

procedure TRegExpr.SetMatchCase(CaseSensitive: Boolean);
begin

  // Check current setting
  if (CaseSensitive <> FMatchCase) then
  begin
     // Set new case sensitive setting
     FMatchCase:=CaseSensitive;
     // Reparse if we were in a parsed state
     if FParsed then ParseExpression;
  end;

end;

function TRegExpr.GetSource: PChar;
begin

  // Return the source
  result:=FSource;

end;

function TRegExpr.GetPattern: PChar;
begin

  // Return the pattern
  result:=FPattern;

end;

procedure TRegExpr.SetSource(Source: PChar);
begin

  // Assign the new source
  FreeMem(FSource);
  FSource:=AllocMem(Succ(StrLen(Source)));
  StrCopy(FSource, Source);

  // Source has changed, update the new state
  ResetAll;

end;

procedure TRegExpr.SetPattern(Pattern: PChar);
begin

  // Assign the new pattern
  FreeMem(FPattern);
  FPattern:=AllocMem(Succ(StrLen(Pattern)));
  StrCopy(FPattern, Pattern);

  // Reset parsed state
  FParsed:=False;

  // Parse the expression
  ParseExpression;

  // Set parsed state to true
  FParsed:=True;

end;

procedure TRegExpr.FreeState(State: PRegState);
begin

  // Is the state record assigned?
  if Assigned(State) then
  begin
     // If char class is allocated then we need to free the memory for the char set
     if (State^.regType in [rtClass, rtNegClass]) then FreeMem(State^.lpClass);
     // Free the state record
     FreeMem(State);
  end;

end;

procedure TRegExpr.ClearStateTable;
var  dwIndex:    Integer;
begin

  // Clear all the NFA state records
  for dwIndex:=Pred(FTable.Count) downto 0 do FreeState(FTable[dwIndex]);

  // Clear the table
  FTable.Clear;

end;

function TRegExpr.AllocState(StateType: TRegType): PRegState;
begin

  // Allocate a new regx state record
  result:=AllocMem(SizeOf(TRegState));

  // Set the type
  result^.regType:=StateType;

end;

constructor TRegExpr.CreatePattern(Pattern: PChar);
begin

  // Perform inherited for TRegExpr
  Create;

  // Set the pattern
  SetPattern(Pattern);

end;

constructor TRegExpr.Create;
begin

  // Perform inherited
  inherited Create;

  // Initialization
  FPattern:=AllocMem(1);
  FSource:=AllocMem(1);
  FError:=AllocMem(256);
  FPos:=FPattern;
  FAncStart:=False;
  FAncEnd:=False;
  FMatchCase:=False;
  FTable:=TList.Create;
  FParsed:=False;

  // Reset to default (unmatched) state
  ResetAll;

end;

destructor TRegExpr.Destroy;
begin

  // Clear the state records
  ClearStateTable;

  // Free the state table list
  FTable.Free;

  // Free all allocated memory
  FreeMem(FPattern);
  FreeMem(FSource);
  FreeMem(FError);

  // Perform inherited
  inherited Destroy;

end;

end.
0
DragonSlayerCommented:
Wow Russell... your own reg exp? You must be too free :-P
0
It's more than this solution.Get answers and train to solve all your tech problems - anytime, anywhere.Try it for free Edge Out The Competitionfor your dream job with proven skills and certifications.Get started today Stand Outas the employee with proven skills.Start learning today for free Move Your Career Forwardwith certification training in the latest technologies.Start your trial today
Delphi

From novice to tech pro — start learning today.

Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.