
// SECTION "EF5"   // Last modified 83-05-31

/* The procedures in this section deal with pattern matching. and
with the  replacement  or  saving of patterns.  The pattern to be
matched is in 'pat' and the matching is done on 'line'.    Except
for  'start_ef5',  the procedures are listed in alphabetic order.
*/

GET "ef0.h"

LET start_ef5(n) BE
{ // writes("<>start_ef5:*N")
  check_system(n, computer, 5); start_ef6(n) }

LET alter_case(pos) = VALOF
/*  Change the 'len_match' characters at 'pos' on 'line' to upper
case or vice versa according as 'modifier' is 'U' or 'L'.   Yield
the index of the character beyond the replacement.  */
TEST ~ menu_rlu THEN RESULTIS 0 ELSE
{1 FOR i = 1 TO len_match DO
   {F LET c = line%pos
      line%pos := (modifier = 'U') -> upper_case(c),
                  ('A' <= c <= 'Z') -> (c + 'a' - 'A'), c
      pos := pos + 1 }F
   RESULTIS pos }1

AND closure_length(q, p, stride, line_max) = VALOF
/* Yields the maximum length of a closure match of the element at
'p' in 'pat' found at 'q' in 'line'.  If the pattern  element  at
'p'  is  a  class  then  'stride'  is  its  length  and  is >= 2;
otherwise, the stride is 1.  */
TEST ~ menu_closure THEN RESULTIS 0 ELSE
{1 // trace("closure_length: q=%N,p=%N,stride=%N",q,p,stride)
 { LET q1, qmax, pch = q, line_max, pat%p
   IF menu_class THEN
     IF stride > 1 THEN              // character class
     {CL LET member_ = (pch = include_symbol)
         AND p2, pstride = p + 2, p + stride
         WHILE q1 <= qmax DO
           TEST in_class(line%q1, p2, pstride, member_)
           THEN q1 := q1 + 1 ELSE BREAK
         RESULTIS (q1-q) }CL
   // stride = 1
   IF pch = any_symbol THEN RESULTIS (qmax+1-q)
   WHILE q1 <= qmax DO
     TEST pch = line%q1 THEN q1 := q1 + 1 ELSE BREAK
   RESULTIS (q1-q) }1

AND confirmed() = VALOF
/*  Reads  a  line  from  the  console; returns true if the first
significant character is 'Y', or 'y', else returns false.  */
TEST ~ menu_ri THEN RESULTIS FALSE ELSE
{1 // trace("confirmed:")
   { LET save_line = VEC line_csz
     AND save_tag = menu_t -> cur_tag, ?
     AND got_, res = ?, FALSE

     copy_string(line,save_line)
     got_ := got_text(FALSE, FALSE)
     IF menu_t THEN cur_tag := save_tag
     IF got_ THEN
       FOR i = 1 to line%0 DO SWITCHON line%i INTO
       { CASE 'Y': CASE 'y':  res := TRUE
         CASE '*S': CASE '*T':  LOOP
       }
     copy_string(save_line,line)
     UNLESS got_ DO recover_eof()
     RESULTIS res
   }
}1

AND display_pat() BE
/* Reconstructs and prints 'pat' as it was typed by the user.  */
TEST ~ menu_dsplpat THEN writef("/%S/",pat) ELSE
{ LET closure_pending_ = FALSE
  wrch('/')
  FOR i = 1 TO pat%0 - 1 DO
  { LET c = pat%i
    SWITCHON c INTO
    { CASE exclude_symbol:
      CASE include_symbol:
      { LET stride = pat%(i+1)
        IF c = exclude_symbol THEN wrch('~')
        i := i + 2
        IF stride > 3 THEN wrch(lbrac_symbol)
        FOR j = 0 to stride - 3 DO
        { LET cc = pat%(i+j)
          SWITCHON cc INTO
          { CASE any_symbol:   cc := '.';             ENDCASE
            CASE bol_symbol:   cc := carat_symbol
                                                      ENDCASE
            CASE eol_symbol:   cc := '$';             ENDCASE
            CASE range_symbol: dump(pat%(i+j+1)); wrch('-')
                               cc := pat%(i+j+2)
                               j := j + 2;            ENDCASE
          }
          dump(cc)
        }
        IF stride > 3 THEN wrch(rbrac_symbol)
        i := i + stride - 3
        LOOP
      }
      CASE any_symbol:  c := '.';                      ENDCASE
      CASE bol_symbol:  c := carat_symbol;             ENDCASE
      CASE eol_symbol:  c := '$';                      ENDCASE
      CASE closure_symbol:  closure_pending_ := TRUE; LOOP
    }
    dump(c)
    IF closure_pending_ THEN
    { wrch('**')
      closure_pending_ := FALSE
    }
  }
  IF closure_pending_ THEN wrch('**')
  wrch('/')
}

AND in_class(line_ch, p, p_next, member_) = VALOF
/*  Yields true if the 'line_ch' is accepted by the elements of a
class beginning at 'p' on 'pat' and  up  to,  but  not  including
'p_next'.  Membership is required if 'member_' is true, otherwise
non-membership is  required.  The elements may include a range in
the   format   0:   'range_symbol',   1:   lower-bound-char,   2:
upper-bound-char.  */
TEST ~ menu_class THEN RESULTIS TRUE ELSE
{1 // trace("in_class: line_ch=%C p=%N, p_next=%N,*
   //   *member_=%N", line_ch, p, p_next, member_)
   WHILE p < p_next DO
   {W LET pch = pat%p
      LET low_ch, high_ch = pch, pch
      IF pch = range_symbol THEN
      { p := p+1; low_ch := pat%p; p := p+1; high_ch := pat%p }
      IF low_ch <= line_ch <= high_ch THEN RESULTIS member_
      p := p + 1 }W
   RESULTIS ~ member_ }1

AND lsubstitute(i, all_, interactive_, occurrence) = VALOF
/* On the 'i'th line, substitute 'new_string' for the appropriate
(as  given  by  'occurrence') match of 'pat' after 'l_margin' (or
all matches if 'all_' is true),  yielding  the  number  of  lines
stored  if  there was a match, otherwise yielding zero.  If there
was a match then the line(s) are written out, and  'cur_line'  is
set  to  the last line written.  When the modifier is 'L' or 'U',
no replacement occurs but the case of letters is changed to lower
or upper respectively.  If 'interactive_' is true,  the  user  is
asked  to  confirm each substitution, but cur_line is set anyway.
*/
{1 // trace("lsubstitute: i=%N all_=%N interactive_=%N occ=%N",
   // i, all_, interactive_, occurrence)
 { LET pos = l_margin AND replaced_ = FALSE AND multiple_ = FALSE
   AND extra_lines = 0
   fetch_line(i)
   {R pos := scan_line(pos)
      IF (pos<0) LOGOR (replaced_ LOGAND len_match=0) THEN BREAK
      IF menu_rn THEN
      { IF occurrence > 1 THEN
        { occurrence := occurrence - 1
          pos := pos + 1
          multiple_ := TRUE
          LOOP }
        multiple_ := FALSE }
      TEST menu_rlu THEN
        pos := (modifier = 'L' LOGOR modifier = 'U') ->
                 alter_case(pos), replace(pos,interactive_)
      ELSE
        pos := replace(pos,interactive_)
      replaced_ := TRUE }R
   REPEATWHILE (menu_rn -> all_ LOGOR multiple_, all_)
   IF replaced_ THEN
   { LET len, j = line%0, 1
     FOR k = 1 TO len DO IF line%k = '*N' THEN
       extra_lines := extra_lines + 1
     IF extra_lines NE 0 THEN
     { expand(i,extra_lines)
       WHILE j <= len DO
       { TEST line%j = '*N' THEN
         { LET tempv = VEC line_csz
           IF menu_control THEN IF i<0 THEN warn(m_split_control)
           len := len-j
           copy_bytes(len, line, j+1, tempv, 1)
           tempv%0 := len
           line%0 := j-1
           store_line(i); cur_tag := null; i := i+1
           copy_string(tempv, line)
           j := 1
         }
         ELSE j := j + 1
       }
     }
     UNLESS i < 0 THEN cur_line := i
     store_line(i)
   }
   RESULTIS replaced_ -> extra_lines+1, 0 }1

AND omatch(q, p, stride) = VALOF
/* Matches a single  character  at  'q'  in  'line'  against  the
element at  'p'  in 'pat'.  If the match is against a class, then
'stride>1' and this is needed by  'in_class'.    Yields  true  if
there is  a  match,  otherwise  yields  false.  Note that this is
never called with 'q>line%0'. */
{1 // trace("omatch: q=%N p=%N stride=%N",
   //   q, p, stride)
 { LET pch = pat%p
   SWITCHON pch INTO
   {S CASE eol_symbol: RESULTIS FALSE      // q <= line_max
      CASE any_symbol: RESULTIS TRUE
      CASE include_symbol: CASE exclude_symbol:
        IF menu_class THEN RESULTIS
          in_class(line%q, p+2, p+stride, (pch=include_symbol))
      DEFAULT: RESULTIS no_case_ ->
        (upper_case(pch)=upper_case(line%q)), pch=line%q }S }1

AND qmatch(line,t1,t2,patn,p1,p2) = VALOF
/*  A  fast  version  of  rmatch for simple patterns.  Performs a
match of the string in 'patn' in the inclusive range  of  indices
'p1'  to  'p2',  against the string in 'line'. The first match is
attempted at index 't1' of 'line' and the last attempt is made at
index 't2'. The character at index 'p2' of 'patn' is  a  sentinel
so that the fastest possible search loop is produced. */
{1 // trace("qmatch:")
   FOR i = t1 TO t2 DO
   {f LET j, k = i, p1
      WHILE (line%j = patn%k) DO j, k := j + 1, k + 1
      IF (k = p2) THEN RESULTIS i }f
   RESULTIS -1 }1

AND replace(pos, interactive_) = VALOF
/*  Replace  the  'len_match'  characters  at  'pos' on 'line' by
'new_string', allowing for the action of the 'match_symbol'  (&).
If   'interactive_'   is  true,  ask  the  user  to  confirm  the
substitution.  Yield  the  index  of  the  character  beyond  the
replacement.  This is used by the operator R.  */
{1 // trace("replace: pos=%N interactive_=%N", pos, interactive_)
 { LET match_pos = pos
   AND old_line = VEC line_csz
   AND len_tail = line%0 - len_match - pos + 1
   IF menu_ri THEN
     IF interactive_ THEN
     { writes(line); newline()
       FOR i = 1 TO pos-1 DO wrch(line%i = '*T' -> '*T', '*S')
       FOR i = 1 TO len_match DO wrch(carat_symbol); newline()
       UNLESS confirmed() DO RESULTIS pos+1
     }
   copy_string(line, old_line)
   FOR i = 1 TO new_string%0 DO
   {F LET c = new_string%i
      TEST c = match_symbol THEN
      { IF pos + len_match > line_bsz + 1 THEN
                 warn(m_text_too_long)
        copy_bytes(len_match, old_line, match_pos, line, pos)
        pos := pos + len_match }
      ELSE
      { IF pos > line_bsz+1 THEN warn(m_text_too_long)
        line%pos := c; pos := pos + 1  } }F
   IF pos + len_tail>line_bsz+1 THEN warn(m_text_too_long)
   copy_bytes(len_tail,old_line,match_pos + len_match,line,pos)
   line%0 := pos + len_tail - 1
   RESULTIS pos }1

AND rmatch(b, q, p, line_max) = VALOF
/* Matches 'pat' against 'line' at 'b', assuming it  is  done  to
'p' on  'pat'  and  to  'q'  on 'line'.  If there is a successful
match, it yields the number of  characters  (from  'b')  matched,
otherwise yields  -1.  The function is complicated by the need to
deal with closures (repetitions), which it  handles  recursively.
A  closure  in  'pat'  has  the  format  0:  'closure_symbol', 1:
element-to-be-repeated.  A pattern element may also  be  a  class
which is in the format 0: 'include/exclude_symbol', 1: stride, 2:
class elements. Note that, for a class, stride >= 2.  */
{1 // trace("rmatch: b=%N q=%N p=%N", b, q, p)
 { LET closure_ = ?
   IF p >= pat%0 THEN                 // pattern exhausted
     RESULTIS (q-b)                  // success
   TEST menu_closure THEN
   {CL closure_ := (pat%p = closure_symbol)
       IF q > line_max THEN
         UNLESS closure_ THEN
           RESULTIS (pat%p = eol_symbol) -> (q-b), -1
       IF closure_ THEN p := p + 1 }CL // step over closure
   ELSE   // The code is simpler without closure to worry about
     IF q > line_max THEN RESULTIS -1
 { LET stride = menu_class -> ((pat%p=include_symbol) LOGOR
     (pat%p=exclude_symbol) -> pat%(p+1), 1), 1
   IF menu_closure THEN
     IF closure_ THEN
     {C FOR q1 = q+closure_length(q,p,stride,line_max) TO
          q BY -1 DO  // closure
        {F LET n = rmatch(b, q1, p+stride, line_max)
           IF n >= 0 THEN RESULTIS n }F
        RESULTIS -1 }C
   UNLESS omatch(q, p, stride)       // non closure
     THEN RESULTIS -1
   p := p + stride; q := q + 1  }1
REPEAT                               // non closure repeat

AND scan_file(state, negated_) = VALOF
/*  If the 'state' corresponds to a pattern, then scan for a line
matching 'pat', forwards if 'state' is '/'; otherwise  backwards.
If  the  state  corresponds  to a marker, then scan for a line so
tagged, forwards  if  state  is  '*'';  otherwise  backwards.  If
'negated_'  is  true,  the  search is judged successful if a line
fails to match.    Note  that  the  scan  starts  one  line  from
'cur_line' and  will  wrap  around  if necessary.  Yield the line
number if successful, otherwise give a warning  message  that  it
cannot be found.  */
{1 // trace("scan_file: state=%C", state)
 { LET n, step, wrap = cur_line, +1, 1
   IF last_line = 0 THEN warn(m_empty)
   UNLESS state='/' LOGOR state='*'' THEN
                 step, wrap := -1, last_line
   FOR i = 1 TO last_line DO         // gives the correct count
   {F n := n + step; check_interrupt()
      UNLESS 1 <= n <= last_line THEN n := wrap
      fetch_line(n)
      SWITCHON state INTO
      {S CASE '/': CASE under_symbol:  // pattern match
           IF scan_line(l_margin) > 0 NEQV negated_ THEN
             RESULTIS n;                          ENDCASE
         CASE '*'': CASE accent_grave: // marker
           IF menu_t THEN
             IF cur_tag = test_tag NEQV negated_ THEN
               RESULTIS n  }S  }F
   warn(m_not_found) }1

AND scan_line(org) = VALOF
/* Is 'pat' found in 'line' at 'org' or beyond?   The  search  is
carried out  only  between 'l_margin' and 'r_margin'.  Yields the
index of the match if true; otherwise -1.  Modifies  'len_match',
the matched length, as a side effect.  */
{1 // trace("scan_line: org=%N", org)
 { LET line_max = (line%0 > r_margin) -> r_margin, line%0
   AND len = pat%0
   LET bol_, eol_ = (pat%1 = bol_symbol),
     (pat%(len-1) = eol_symbol)
   LET p = bol_ -> 2, 1
   TEST (pat%len = sentinel) LOGAND ~ no_case_ THEN
   {Q line%(1+line%0) := null
      IF eol_ THEN len := len - 1
      len_match := len - p
    { LET last = line_max - len_match + 1
      LET t1, t2 = org, last
      IF bol_ LOGAND (last > l_margin) THEN t2 := l_margin
      IF eol_ LOGAND (org < last) THEN t1 := last
    { LET posn = qmatch(line,t1,t2,pat,p,len)
      IF (posn < 0) THEN len_match := -1
      RESULTIS posn }Q
   ELSE
   FOR i = org TO bol_ -> l_margin, 1+line_max DO
   { len_match := rmatch(i, i, p, line_max)
     IF len_match >= 0 THEN RESULTIS i }
   RESULTIS -1 }1

AND split_line(i, all_) BE
/* Fetch  line  'i'  and  segment it using the pattern 'pat'.  If
'all_' is true, the segments are defined by  all  occurrences  of
'pat' otherwise only by the first.  The segments are saved in the
control lines specified in 'tmp_name' in order.  */
TEST ~menu_control THEN RETURN ELSE
{1 // trace("split_line: i=%N", i)
   fetch_line(i)
 { LET pos = scan_line(l_margin) AND cl_i = 1
   IF pos < 0 THEN warn(m_not_found)
   {R store_seg(pos-1, cl_i); store_seg(len_match, cl_i+1)
      cl_i := cl_i + 2
      pos := all_ -> scan_line(1), -1
      IF len_match = 0 THEN BREAK }R
   REPEATUNTIL pos < 0
   store_seg(line%0, cl_i) }1

AND store_seg(n, i) BE
/* Stores the 'n' characters  at  the  start  of  'line'  in  the
control  specified by 'tmp_name%i' and then shifts 'line' left by
'n' characters.    If  'tmp_name' contains too few characters, or
the 'n'th is not a valid  control  specifier,  no  storing  takes
place.  */
{1 TEST ~ menu_control THEN RETURN ELSE
   { LET tail = line%0 - n
     IF i <= tmp_name%0 THEN
     {2 LET c = -uc_char_number(tmp_name%i, control_chars)
        IF c < 0 THEN
        { line%0 := n; store_line(c) }2
     copy_bytes(tail, line, n+1, line, 1); line%0 := tail }1

AND substitute(all_, interactive_, occurrence) BE
/* Substitute 'new_string'  for  the  appropriate  (as  given  by
'occurrence')  match  of 'pat', if any, (or all matches if 'all_'
is true) on each line in the l_range 'l_line1' to 'l_line2'.   If
'interactive_'  is  true,  the  user  is  asked  to  confirm each
substitution. Each time a potential  or  actual  substitution  is
made  to  a work-space line, 'cur_line' is set.  If 'modifier' is
'L' or 'U', just change the case of letters in the matched string
to lower or upper respectively.  */
{1 // trace("substitute: all_=%N int=%N occ=%N",
   //       all_,interactive_,occurrence)
   TEST menu_control -> l_got = control, FALSE THEN
      UNLESS
     lsubstitute(l_line1,all_,interactive_,occurrence) > 0 THEN
         warn(m_not_found)
   ELSE
   {W LET found_ = FALSE
      AND i = l_line1
      AND j, extra_ = ?, FALSE
      WHILE i <= l_line2 DO
      { check_interrupt()
        j := lsubstitute(i, all_, interactive_, occurrence) - 1
                          // Number of extra lines
        IF j >= 0 THEN
        { found_ := TRUE; i := cur_line
          IF j > 0 THEN { extra_ := TRUE; l_line2 := l_line2+j }
        }
        i := i + 1
      }
      UNLESS menu_x -> (found_ LOGOR x_state_), found_
        THEN warn(m_not_found)
      IF menu_u THEN
         IF extra_ THEN
            modifier := 'Z'   // For post_trail
  }W
}1

.
