/* -*- Mode:C; Coding:us-ascii-unix; fill-column:132 -*- */
/**********************************************************************************************************************************/
/**
   @file      regex_example.c
   @author    Mitch Richling <https://www.mitchr.me/>
   @Copyright Copyright 1994,2014 by Mitch Richling.  All rights reserved.
   @brief     UNIX regex tools@EOL
   @Keywords  UNIX regular expressions regex
   @Std       ISOC POSIX.2 (IEEE Std 103.2) BSD4.3
   @Tested    
              - Solaris 2.8
              - MacOS X.2
              - Linux (RH 7.3)

   This is an example program intended to illustrate very basic use of regular expressions.
  
   Grumpy programmer note: IEEE Std 1003.2, generally referred to as 'POSIX.2' is a bit vague regarding several details like how
   back references work.  It also has a couple of errors (like how a single ')' is treated in a regular expression.  Because of
   this, most actual implementations of the standard will have several minor inconsistencies that one must watch out for.  My best
   advice is to "read the man page" on the platforms you wish to run on and to avoid exotic things.  For example, avoid things like
   the BSD REG_NOSPEC and REG_PEND options.  Another option is to simply carry your favorite regular expression library with you.
   For example, C++11 has very good regex support, and the BOOST library has a very nice regex class for older C++ versions.  PCRE
   is probably the most popular alternative, FOSS regular expression library available.
***********************************************************************************************************************************/

#include <sys/types.h>          /* UNIX types      POSIX */
#include <regex.h>              /* Regular Exp     POSIX */
#include <stdio.h>              /* I/O lib         C89   */
#include <string.h>             /* Strings         C89   */
#include <stdlib.h>             /* Standard Lib    C89   */

/**********************************************************************************************************************************/
#define MAX_SUB_EXPR_CNT 256
#define MAX_SUB_EXPR_LEN 256
#define MAX_ERR_STR_LEN  256

/**********************************************************************************************************************************/
int main(int argc, char *argv[]) {
  int i;                                /* Loop variable.                          */
  char p[MAX_SUB_EXPR_LEN];             /* For string manipulation                 */
  regex_t aCmpRegex;                    /* Pointer to our compiled regex           */
  char *aStrRegex;                      /* Pointer to the string holding the regex */
  regmatch_t pMatch[MAX_SUB_EXPR_CNT];  /* Hold partial matches.                   */
  char **aLineToMatch;                  /* Holds each line that we wish to match   */
  int result;                           /* Return from regcomp() and regexec()     */
  char outMsgBuf[MAX_ERR_STR_LEN];      /* Holds error messages from regerror()    */
  char *testStrings[] = { "This should match... hello",
                          "This could match... hello!",
                          "More than one hello.. hello",
                          "No chance of a match...",
                          NULL};

  /* use aStrRegex for readability. */
  aStrRegex = "(.*)(hello)+";
  printf("Regex to use: %s\n", aStrRegex);

  /* Compile the regex */
  if( (result = regcomp(&aCmpRegex, aStrRegex, REG_EXTENDED)) ) {
    printf("Error compiling regex(%d).\n", result);
    regerror(result, &aCmpRegex, outMsgBuf, sizeof(outMsgBuf));
    printf("Error msg: %s\n", outMsgBuf);
    exit(1);
  } /* end if */

  /*  Possible last argument to regcomp (||'ed together):
        REG_EXTENDED  Use extended regular expressions
        REG_BASIC     Use basic regular expressions
        REG_NOSPEC    Special character support off (Not POSIX.2)
        REG_ICASE     Ignore upper/lower case distinctions
        REG_NOSUB     No sub-strings (just check for match/no match)
        REG_NEWLINE   Compile for newline-sensitive matching
        REG_PEND      Specify alternate string ending (Not POSIX.2) */


  /* Apply our regular expression to some strings. */
  for(aLineToMatch=testStrings; *aLineToMatch != NULL; aLineToMatch++) {
    printf("String: %s\n", *aLineToMatch);
    printf("        %s\n", "0123456789012345678901234567890123456789");
    printf("        %s\n", "0         1         2         3");
    /* compare and check result (MAX_SUB_EXPR_CNT max sub-expressions).*/
    if( !(result = regexec(&aCmpRegex, *aLineToMatch, MAX_SUB_EXPR_CNT, pMatch, 0)) ) {
      /* Last argument to regexec (||'ed together):
         REG_NOTBOL    Start of the string is NOT the start of a line
         REG_NOTEOL    $ shouldn't match end of string (gotta have a newline)
         REG_STARTEND  Not POSIX.2 */
      printf("Result: We have a match!\n");
      for(i=0;i<=(int)aCmpRegex.re_nsub;i++) {
        printf("Match(%2d/%2d): (%2d,%2d): ", 
               i, 
               (int)(aCmpRegex.re_nsub), 
               (int)(pMatch[i].rm_so), 
               (int)(pMatch[i].rm_eo));

          if( (pMatch[i].rm_so >= 0) && (pMatch[i].rm_eo >= 1) && 
              (pMatch[i].rm_so != pMatch[i].rm_eo) ) {
            strncpy(p, &((*aLineToMatch)[pMatch[i].rm_so]), pMatch[i].rm_eo-pMatch[i].rm_so);
            p[pMatch[i].rm_eo-pMatch[i].rm_so] = '\0';
            printf("'%s'", p);
          } /* end if */
          printf("\n");
      } /* end for */
      printf("\n");
    } else {
      switch(result) {
        case REG_NOMATCH   : printf("String did not match the pattern\n");                   break;
        ////Some typical return codes:
        //case REG_BADPAT    : printf("invalid regular expression\n");                         break;
        //case REG_ECOLLATE  : printf("invalid collating element\n");                          break;
        //case REG_ECTYPE    : printf("invalid character class\n");                            break;
        //case REG_EESCAPE   : printf("`\' applied to unescapable character\n");               break;
        //case REG_ESUBREG   : printf("invalid backreference number\n");                       break;
        //case REG_EBRACK    : printf("brackets `[ ]' not balanced\n");                        break;
        //case REG_EPAREN    : printf("parentheses `( )' not balanced\n");                     break;
        //case REG_EBRACE    : printf("braces `{ }' not balanced\n");                          break;
        //case REG_BADBR     : printf("invalid repetition count(s) in `{ }'\n");               break;
        //case REG_ERANGE    : printf("invalid character range in `[ ]'\n");                   break;
        //case REG_ESPACE    : printf("Ran out of memory\n");                                  break;
        //case REG_BADRPT    : printf("`?', `*', or `+' operand invalid\n");                   break;
        //case REG_EMPTY     : printf("empty (sub)expression\n");                              break;
        //case REG_ASSERT    : printf("can't happen - you found a bug\n");                     break;
        //case REG_INVARG    : printf("A bad option was passed\n");                            break;
        //case REG_ILLSEQ    : printf("illegal byte sequence\n");                              break;
        default              : printf("Unknown error\n");                                      break;
      } /* end switch */
      regerror(result, &aCmpRegex, outMsgBuf, sizeof(outMsgBuf));
      printf("Result: Error msg: %s\n\n", outMsgBuf);
    } /* end if/else */
  } /* end for */
  
  /* Free up resources for the regular expression */
  regfree(&aCmpRegex);

  exit(0);
} /* end func main */