;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: REGEX; Base: 10 -*- (in-package "REGEX") ;;; ;;; Pass 3 - Canonicalization/tree rewrites/simplification ;;; ; (class "c") --> #\c ; (seq ... ...) --> (seq ...) ; (seq a b (seq c d) e f) --> (seq a b c d e f) ; (alt a b (alt c d) e f) --> (alt a b c d e f) ; (reg n ) --> (seq (rstart n) (rend n)) ; (opt a) --> (alt a nil) or (alt nil a) ; Iteratively canonicalize the tree, until it stabilizes, then simplify (defun optimize-regex-tree (tree) (let* ((canonical (canonicalize tree)) (better (improve canonical)) (even-better (split-alts better)) (simple (simplify even-better))) simple)) (defun canonicalize (tree) (when *regex-compile-verbose* (format t "~&~%Canonicalize Start:") (pprint tree)) (loop with prev-tree = tree for pass from 1 for new-tree = (canonicalize-once prev-tree) when (or (equal prev-tree new-tree) (> pass 10)) return new-tree do (progn (when *regex-compile-verbose* (format t "~&Canonicalize Pass ~D:" pass) (pprint new-tree)) (setq prev-tree new-tree)))) (defun improve (tree) (when *regex-compile-verbose* (format t "~&~%Optimize Start:") (pprint tree)) (loop with prev-tree = tree for pass from 1 for new-tree = (improve-once prev-tree) when (or (equal prev-tree new-tree) (> pass 10)) return new-tree do (progn (when *regex-compile-verbose* (format t "~&Optimize Pass ~D:" pass) (pprint new-tree)) (setq prev-tree new-tree)))) (defun split-alts (tree) (when *regex-compile-verbose* (format t "~&~%Split Alts Start:") (pprint tree)) (split-alts-aux tree)) (defun simplify (tree) (when *regex-compile-verbose* (format t "~&~%Simplify Start:") (pprint tree)) (loop with prev-tree = tree for pass from 1 for new-tree = (simplify-once prev-tree) when (or (equal prev-tree new-tree) (> pass 10)) return new-tree do (progn (when *regex-compile-verbose* (format t "~&Simplify Pass ~D:" pass) (pprint new-tree)) (setq prev-tree new-tree)))) ; for ALT's, after hoisting, try partitioning the set on the leading element, ; and see if we can't reduce things further: ; (alt (seq a) (seq a c) (seq c)) -> (alt (seq a (alt nil (seq c))) (seq c)) ; --> (alt-case (a (alt nil (seq c))) ; (c )) ; ; for Kleene's, check to see if the child node is also a kleene w/ no ; registers, and if so remove the inner kleene. ; (defun canonicalize-once (node) (cond ;; expand strings into sequences of char to enable more optimizations ((string-node-p node) (make-seq-node-list (coerce (string-node-string node) 'list))) ;; expand class-sequences into sequences of charclass ((classseq-node-p node) (make-seq-node-list (mapcar #'make-charclass-node (classseq-node-seq node)))) ((seq-node-p node) (let* ((canonicalchildren (mapcar #'canonicalize-once (seq-node-children node))) (denullchildren (remove-if #'null canonicalchildren)) (flatchildren (flatten-sequence denullchildren)) (children flatchildren) (numchildren (length flatchildren))) (cond ((zerop numchildren) nil) ((= numchildren 1) (first children)) (t (make-seq-node-list children))))) ((alt-node-p node) (let* ((children (mapcar #'canonicalize-once (alt-node-children node))) (flatchildren (flatten-alt children)) (uniquechildren (remove-duplicates flatchildren :from-end t)) (children uniquechildren)) (make-alt-node-list children))) ((kleene-node-p node) (make-kleene-node (canonicalize-once (kleene-node-child node)) (kleene-node-greedy-p node))) ;; If the child node is something trivial like char, seq of char, ;; any, charclass, specclass,or seq of charclass, may want to go ;; ahead and leave it as a + node and specialize it during the ;; instruction selection pass. ((pkleene-node-p node) (let ((greedyp (pkleene-node-greedy-p node)) (canonical-child (canonicalize-once (pkleene-node-child node)))) (make-seq-node-args canonical-child (make-kleene-node (cond (*registers-match-rightmost* canonical-child) (t (canonicalize-once (unregister canonical-child)))) greedyp)))) ((optional-node-p node) (let ((greedyp (optional-node-greedy-p node)) (canonical-child (canonicalize-once (optional-node-child node)))) (cond (greedyp (make-alt-node-args canonical-child nil)) (t (make-alt-node-args nil canonical-child))))) ((charclass-node-p node) (let* ((negp (charclass-node-negated-p node)) (chars (charclass-node-chars node)) (cclen (length chars))) (cond ((zerop cclen) nil) ((and (= cclen 1) (not negp)) (make-char-node (char chars 0))) (t node)))) ((register-node-p node) (let ((regnum (register-node-regnum node))) (make-seq-node-args (make-regstart-node regnum) (canonicalize-once (register-node-child node)) (make-regend-node regnum)))) ((range-node-p node) (expand-range (range-node-greedy-p node) (range-node-min node) (range-node-max node) (canonicalize-once (range-node-child node)))) (t node))) (defun improve-once (node) (cond ((alt-node-p node) (let ((children (mapcar #'improve-once (alt-node-children node)))) (multiple-value-bind (prefix altbody suffix) (hoist-alt-ends children) (cond (altbody (make-seq-node-list `(,@prefix ,(make-alt-node-list altbody) ,@suffix))) ((and (or prefix suffix) (null altbody)) (make-seq-node-list `(,@prefix ,@suffix))) (t (make-alt-node-list altbody)))))) ((seq-node-p node) (make-seq-node-list (mapcar #'improve-once (seq-node-children node)))) ((kleene-node-p node) (let* ((greedyp (kleene-node-greedy-p node)) (child (improve-once (kleene-node-child node))) (hasregs (contains-registers-p child))) (cond ((or *registers-match-rightmost* (not hasregs)) (make-kleene-node child greedyp)) (greedyp (let ((more (make-kleene-node (unregister child) greedyp))) (make-alt-node-args (make-seq-node-args child more) nil))) (t ;; not greedy (let ((more (make-kleene-node (unregister child) greedyp))) (make-alt-node-args nil (make-seq-node-args child more))))))) (t node))) (defun split-alts-aux (node) (cond ((seq-node-p node) (let ((children (mapcar #'split-alts-aux (seq-node-children node)))) (make-seq-node-list children))) ((alt-node-p node) (let ((children (mapcar #'split-alts-aux (alt-node-children node))) (num-children (alt-node-numchildren node))) (multiple-value-bind (unknown-char known-char-sets) (partition-on-leading-char children) (let* ((num-unknown-char (length unknown-char)) (num-known-char-sets (length known-char-sets)) (worth-partitioning-p (worth-alt-case-partitioning-p num-children num-unknown-char num-known-char-sets))) (cond ((and unknown-char known-char-sets worth-partitioning-p) (make-alt-node-list `(,(make-casealt-node-list (mapcar #'subalt-if-necessary known-char-sets)) ,@unknown-char))) ((and (not unknown-char) known-char-sets worth-partitioning-p) (make-casealt-node-list (mapcar #'subalt-if-necessary known-char-sets))) (t node)))))) ((kleene-node-p node) (make-kleene-node (split-alts-aux (kleene-node-child node)) (kleene-node-greedy-p node))) (t node))) (defun simplify-once (node) (cond ((seq-node-p node) (let ((newchildren (combine-sequence-text (mapcar #'simplify-once (seq-node-children node))))) (cond ((= (length newchildren) 1) (first newchildren)) (t (make-seq-node-list newchildren))))) ((alt-node-p node) (let ((newalts (combine-alt-charclass (mapcar #'simplify-once (alt-node-children node))))) (cond ((= (length newalts) 1) (first newalts)) (t (make-alt-node-list newalts))))) ((casealt-node-p node) (make-casealt-node-list (mapcar #'(lambda (arm) (list (first arm) (simplify-once (second arm)))) (casealt-node-children node)))) ((kleene-node-p node) (make-kleene-node (simplify-once (kleene-node-child node)) (kleene-node-greedy-p node))) (t node))) ; expand out a range (defun expand-range (greedyp lowbound highbound node) (cond ((and *registers-match-rightmost* (numberp highbound)) (let* ((reqd (loop for i from 0 below lowbound collect node)) (opt (loop with tmp = nil for i from lowbound below highbound when (= i lowbound) do (setq tmp (make-optional-node node greedyp)) when (> i lowbound) do (let ((seq (make-seq-node-args node tmp))) (setq tmp (cond (greedyp (make-alt-node-args seq nil)) (t (make-alt-node-args nil seq))))) finally (return tmp)))) (make-seq-node-list `(,@reqd ,opt)))) ((and *registers-match-rightmost* (null highbound)) (let* ((reqd (loop for i from 0 below lowbound collect node))) (make-seq-node-list `(,@reqd ,(make-kleene-node node greedyp))))) ((and (not *registers-match-rightmost*) (numberp highbound)) (let* ((registerless-node (unregister node)) (reqd (loop for i from 0 below lowbound when (zerop i) collect node when (> i 0) collect registerless-node)) (opt (loop with tmp = nil for i from lowbound below highbound when (= i lowbound) do (setq tmp (make-optional-node (if (zerop lowbound) node registerless-node) greedyp)) when (> i lowbound) do (let ((seq (make-seq-node-args registerless-node tmp))) (setq tmp (cond (greedyp (make-alt-node-args seq nil)) (t (make-alt-node-args nil seq))))) finally (return tmp)))) (make-seq-node-list `(,@reqd ,opt)))) (t (let* ((registerless-node (unregister node)) (reqd (loop for i from 0 below lowbound when (zerop i) collect node when (> i 0) collect registerless-node))) (cond ((zerop lowbound) (make-kleene-node node greedyp)) (t (make-seq-node-list `(,@reqd ,(make-kleene-node registerless-node greedyp))))))))) (defun coercetostring (x) (cond ((stringp x) x) ((characterp x) (string x)))) ; unnest sequences where possible (defun flatten-sequence (nodes) (cond ((null nodes) nil) ((seq-node-p (first nodes)) (flatten-sequence (append (seq-node-children (first nodes)) (rest nodes)))) (t (cons (first nodes) (flatten-sequence (rest nodes)))))) ; combine runs of chars and strings into strings ; combine runs of character classes into charclass-sequence's (defun combine-sequence-text (nodes) (cond ((null nodes) nil) ((string-seq-p nodes) (multiple-value-bind (str restseq) (partition-string-sequence nodes) (cons (make-string-node str) (combine-sequence-text restseq)))) ((char-class-seq-p nodes) (multiple-value-bind (classseq restseq) (partition-charclass-sequence nodes) (cons (make-classseq-node classseq) (combine-sequence-text restseq)))) (t (cons (first nodes) (combine-sequence-text (rest nodes)))))) ; combine multiple character classes in an ALT into one character class (defun combine-alt-charclass (nodes) (cond ((>= (count-if #'char-or-class-node-p nodes) 2) (multiple-value-bind (chars othernodes) (partition-charclass-alt nodes) (cons (make-charclass-node chars) othernodes))) (t nodes))) ;; does this sequence start out with a run of chars? (defun string-seq-p (seq) (loop for item in seq for i from 0 while (text-node-p item) when (>= i 1) return t) ) ;; does this sequence start out with a run of char-class? (defun char-class-seq-p (seq) (loop for item in seq for i from 0 while (positive-charclass-node-p item) when (>= i 1) return t) ) ;; partitions sequences into a string representing the leading run, ;; and the rest of the sequence (defun partition-string-sequence (seq) (loop with strseq = nil for item = (pop seq) when (text-node-p item) do (push item strseq) when (not (text-node-p item)) return (values (append-strings (mapcar #'coercetostring (reverse strseq))) (cond ((and (null item) (null seq)) nil) ((null seq) (list item)) (t (cons item seq)))))) (defun append-strings (strings) (cond ((null strings) "") (t (loop for str in strings for result = str then (concatenate 'simple-string result str) finally (return result))))) (defun partition-charclass-sequence (seq) (loop with classseq = nil for item = (pop seq) when (positive-charclass-node-p item) do (push item classseq) when (not (charclass-node-p item)) return (values (mapcar #'classseq-node-seq (reverse classseq)) (cond ((and (null item) (null seq)) nil) ((null seq) (list item)) (t (cons item seq)))))) (defun partition-charclass-alt (nodes) (loop with chars = "" for node = (pop nodes) when (charclass-node-p node) do (setq chars (concatenate 'string chars (charclass-node-chars node))) when (char-node-p node) do (setq chars (concatenate 'string chars (string (char-node-char node)))) when (not (char-or-class-node-p node)) return (values chars (cond ((and (null node) (null nodes)) nil) ((null nodes) (list node)) (t (cons node nodes)))))) (defun flatten-alt (nodes) (cond ((null nodes) nil) ((alt-node-p (first nodes)) (flatten-alt (append (alt-node-children (first nodes)) (rest nodes)))) (t (cons (first nodes) (flatten-alt (rest nodes)))))) (defun hoist-alt-ends (nodes) (multiple-value-bind (prefix restnodes) (hoist-alt-prefix nodes) (multiple-value-bind (altbody suffix) (hoist-alt-suffix restnodes) (values prefix altbody (reverse suffix))))) (defun seq-first (node) (cond ((seq-node-p node) (first (seq-node-children node))) (t node))) (defun seq-first-char (node) (cond ((seq-node-p node) (let ((children (seq-node-children node))) (loop for child in children when (or (alt-node-p child) (kleene-node-p child) (specclass-node-p child) (hook-node-p child) (backmatch-node-p child)) return nil when (or (charclass-node-p child) (char-node-p child)) return child))) (t nil))) (defun seq-rest (node) (cond ((seq-node-p node) (make-seq-node-list (rest (seq-node-children node)))) (t nil))) ;; unlike CL's last, this returns the last car, not the last cons cell (defun seq-last (node) (cond ((seq-node-p node) (car (last (seq-node-children node)))) (t node))) (defun seq-butlast (node) (cond ((seq-node-p node) (make-seq-node-list (butlast (seq-node-children node)))) (t nil))) (defun hoist-alt-prefix (nodes) (let ((prefixes (mapcar #'seq-first nodes)) (rests (mapcar #'seq-rest nodes))) (cond ((or (null prefixes) (some #'null prefixes)) (values nil nodes)) ((every #'equal prefixes (rest prefixes)) (cond ((or (null rests) (every #'null rests)) (values (list (first prefixes)) nil)) ((or (null rests) (some #'null rests)) (values (list (first prefixes)) rests)) (t (multiple-value-bind (other-prefixes altnodes) (hoist-alt-prefix rests) (values (cons (first prefixes) other-prefixes) altnodes))))) (t (values nil nodes))))) (defun hoist-alt-suffix (nodes) (let ((suffixes (mapcar #'seq-last nodes)) (butlasts (mapcar #'seq-butlast nodes))) (cond ((or (null suffixes) (some #'null suffixes)) (values nodes nil)) ((every #'equal suffixes (rest suffixes)) (cond ((or (null butlasts) (some #'null butlasts)) (values butlasts (list (first suffixes)))) (t (multiple-value-bind (altnodes other-suffixes) (hoist-alt-suffix butlasts) (values altnodes (cons (first suffixes) other-suffixes)))))) (t (values nodes nil))))) ; (alt "aaa" "bbb" "bbc" "ccc" "ccd" (* #\e)) ; --> ; ((* #\e)) ; ((#\a "aaa") ; (#\b "bbb" "bbc") ; (#\c "ccc" "ccd")) ;; Now handles character classes as well (defun partition-on-leading-char (children) (let* ((leading-char-alist (make-discriminant-char-alist children)) (children-without-leading-char (mapcar #'second (remove-if #'first leading-char-alist))) (children-with-leading-char (remove-if-not #'first leading-char-alist)) (sorted-lc-children (sort children-with-leading-char #'sort-lc-pred :key #'first))) (loop with partition = nil for (prev-leading-char prev-childnode) in sorted-lc-children for (leading-char childnode) in (rest sorted-lc-children) for current-set = (list prev-childnode) then current-set when (not (equalp prev-leading-char leading-char)) do (progn (push (cons prev-leading-char (reverse current-set)) partition) (setq current-set (list childnode))) when (equalp prev-leading-char leading-char) do (push childnode current-set) finally (return (values children-without-leading-char (reverse (cons (cons leading-char (reverse current-set)) partition))))))) (defun sort-lc-pred (a b) (cond ((and (characterp a) (characterp b)) (char< a b)) ((and (characterp a) (stringp b)) (char< a (char b 0))) ((and (stringp a) (characterp b)) (char< (char a 0) b)))) (defun make-discriminant-char-alist (nodes &aux alist) (dolist (node nodes alist) (setq alist (nconc alist (get-discriminant-chars-alist node))))) ;; if first in sequence is a positive charclass, return all chars as the discriminant (defun get-discriminant-chars-alist (node) (let ((first (seq-first-char node))) (cond ((char-node-p first) (list (list (char-node-char first) node))) ((and (charclass-node-p first) (not (charclass-node-negated-p first))) (loop for x across (charclass-node-chars first) collect (list x node))) (t (list (list nil node)))))) (defun worth-alt-case-partitioning-p (num-children num-unknown-char-sets num-known-char-sets) "Is it worth partitioning an alt into a case-alt?" (and ;; Partition must split out at least 4 alternatives (>= (- num-children num-unknown-char-sets) 4) ;; Must be more than one set in the alt-case (the case of 1 should ;; be handled separately) (> num-known-char-sets 1))) (defun subalt-if-necessary (casealt-arm) (let ((len (length (rest casealt-arm)))) (cond ((zerop len) (error "sub-alt must have at least one clause ~S" casealt-arm)) ((= len 1) `(,(first casealt-arm) ,(make-seq-node-args (second casealt-arm)))) (t `(,(first casealt-arm) ,(make-alt-node-list (rest casealt-arm))))))) (defun obviously-nullable-pattern (tree) (and (listp tree) (or (not (null (member (first tree) '(* *?)))) (and (eq (first tree) 'alt) (<= (length tree) 2))))) (defun contains-looping-pattern-p (tree) (tree-any #'(lambda (x) (member x '(* + *? +?) :test #'eq)) tree)) (defun contains-registers-p (tree) (tree-any #'(lambda (x) (member x '(reg regstart regend))) tree)) (defun tree-any (fxn tree) (labels ((tree-any-aux (tree) (cond ((null tree) nil) ((atom tree) (funcall fxn tree)) (t (or (funcall fxn tree) (safer-some #'tree-any-aux tree)))))) (tree-any-aux tree))) ;; Similar to CL's SOME, but doesn't barf on improper lists. (defun safer-some (fxn lst) (loop while (consp lst) for x = (pop lst) when (funcall fxn x) return t finally (return (cond ((null lst) nil) (t (funcall fxn lst)))))) (defun unregister (node) (cond ((null node) nil) ((char-node-p node) node) ((string-node-p node) node) ((classseq-node-p node) node) ((backmatch-node-p node) node) ((seq-node-p node) (make-seq-node-list (remove-if #'null (mapcar #'unregister (seq-node-children node))))) ((kleene-node-p node) (make-kleene-node (unregister (kleene-node-child node)) (kleene-node-greedy-p node))) ((pkleene-node-p node) (make-pkleene-node (unregister (pkleene-node-child node)) (pkleene-node-greedy-p node))) ((optional-node-p node) (make-optional-node (unregister (optional-node-child node)) (optional-node-greedy-p node))) ((range-node-p node) (make-range-node (unregister (range-node-child node)) (range-node-min node) (range-node-max node) (range-node-greedy-p node))) ((alt-node-p node) ; don't descend into alt nodes -- we may match one branch one iter, and another ; branch the next. ;(make-alt-node-list (mapcar #'unregister (alt-node-children node)))) node) ((start-anchor-node-p node) node) ((end-anchor-node-p node) node) ((register-node-p node) (unregister (register-node-child node))) ((regstart-node-p node) nil) ((regend-node-p node) nil) ((charclass-node-p node) (make-charclass-node (charclass-node-chars node) :negated (charclass-node-negated-p node))) ((specclass-node-p node) (make-specclass-node (specclass-node-class node) :negated (specclass-node-negated-p node))) ((any-node-p node) node) ((hook-node-p node) node) (t ;; once we're done, this should throw the :invalid-parse-tree tag (throw 'regex-parse-error (list "unregister: Unrecognized regex parse tree node ~S" node)))) ) ;;; ;;; Pass 4 - Instruction selection ;;; ; replace *, +, etc with the actual instructions to be used... (defun select-instructions (node) (cond ((null node) nil) ((char-node-p node) (select-char-instr (char-node-char node))) ((string-node-p node) (select-string-instr (string-node-string node))) ((classseq-node-p node) (select-classseq-instr (classseq-node-seq node))) ((backmatch-node-p node) node) ((seq-node-p node) (select-sequence-instrs (seq-node-children node))) ((kleene-node-p node) (cond ((kleene-node-greedy-p node) (select-greedy-kleene-instr (kleene-node-child node))) (t (select-nongreedy-kleene-instr (kleene-node-child node))))) ;; pkleene-nodes have been removed by the simplification process ((optional-node-p node) (cond ((optional-node-greedy-p node) (select-greedy-optional-instr (optional-node-child node))) (t (select-nongreedy-optional-instr (optional-node-child node))))) ;; range-nodes have been removed by simplification process ((alt-node-p node) (select-alt-instrs (alt-node-children node))) ((casealt-node-p node) (select-casealt-instr (casealt-node-children node))) ((start-anchor-node-p node) node) ((end-anchor-node-p node) node) ;; register-nodes have been removed by simplification process ((regstart-node-p node) (let ((regnum (regstart-node-regnum node))) (cond (*registers-match-rightmost* `(right-rstart ,regnum)) (t `(left-rstart ,regnum))))) ((regend-node-p node) node) ((charclass-node-p node) (let ((chars (charclass-node-chars node))) (cond ((not (charclass-node-negated-p node)) (select-charclass-instr chars)) (t (select-negated-charclass-instr chars))))) ((specclass-node-p node) (cond ((not (specclass-node-negated-p node)) (select-specclass-instr (specclass-node-class node))) (t (select-negated-specclass-instr (specclass-node-class node))))) ((any-node-p node) node) ((hook-node-p node) node) ((success-node-p node) node) (t ;; once we're done, this should throw the :invalid-parse-tree tag (throw 'regex-parse-error (list "compile-state-machine: Unhandled regex parse tree node ~S" node))))) (defun select-char-instr (chr) `(char ,chr)) (defun select-string-instr (str) ;; the pattern string must always be a simple-string (let ((simple-pat-str (coerce str 'simple-string))) `(string ,simple-pat-str))) (defun select-classseq-instr (classseq) `(classseq ,classseq)) (defun select-sequence-instrs (children) (make-seq-node-list (mapcar #'select-instructions children))) ;; optimize (alt-2 ) ;; optimize (alt-2 ) ;; optimize (alt-2 ) ;; optimize (alt-2 ) (defun select-alt-instrs (children) (make-alt-node-list (mapcar #'select-instructions children))) (defun select-casealt-instr (children) (make-casealt-node-list (mapcar #'(lambda (arm) (list (first arm) (select-instructions (second arm)))) children))) ;; By this point, we have already been unrolled to move registers out ;; of loops, so we can just worry about the special cases. (defun select-greedy-kleene-instr (child &aux (nullpat (nullable-pattern-p child)) (looppat (contains-looping-pattern-p child))) (cond ((char-node-p child) `(char-greedy-kleene ,(char-node-char child))) ((string-node-p child) ;; the pattern string must always be a simple-string (let ((simple-pat-str (coerce (string-node-string child) 'simple-string))) `(str-greedy-kleene ,simple-pat-str))) ((charclass-node-p child) (let* ((negp (charclass-node-negated-p child)) (chars (charclass-node-chars child)) (ccsize (length chars))) (case ccsize (0 '(any-greedy-kleene)) (1 (cond ((not negp) `(char-greedy-kleene ,(char chars 0))) (negp `(not-char-greedy-kleene ,(char chars 0))))) (2 (cond ((not negp) `(cclass-2-greedy-kleene ,(char chars 0) ,(char chars 1))) (negp `(not-cclass-2-greedy-kleene ,(char chars 0) ,(char chars 1))))) (t (let ((schars (coerce chars 'simple-string))) (cond ((not negp) `(cclass-greedy-kleene ,schars)) (t `(not-cclass-greedy-kleene ,schars)))))))) ((specclass-node-p child) (let* ((negp (specclass-node-negated-p child)) (specclass (specclass-node-class child))) (cond ((not negp) `(specclass-greedy-kleene ,specclass)) (negp `(not-specclass-greedy-kleene ,specclass))))) ((not nullpat) `(greedy-kleene-no-termcheck ,(select-instructions child))) ((or (and (not nullpat) looppat) (and nullpat (not looppat))) `(greedy-kleene-simple-termcheck ,(select-instructions child))) (t `(greedy-kleene-full-termcheck ,(select-instructions child)))) ) (defun select-nongreedy-kleene-instr (child) (let ((nullpat (nullable-pattern-p child)) (looppat (contains-looping-pattern-p child))) (cond ((not nullpat) `(ngkleene-no-termcheck ,(select-instructions child))) ((or (and (not nullpat) looppat) (and nullpat (not looppat))) `(ngkleene-simple-termcheck ,(select-instructions child))) (t `(ngkleene-full-termcheck ,(select-instructions child))))) ) (defun select-greedy-optional-instr (child) (make-alt-node-args (select-instructions child) nil)) (defun select-nongreedy-optional-instr (child) (make-alt-node-args nil (select-instructions child))) (defun select-charclass-instr (chars) (let ((ccsize (length chars))) (case ccsize (1 (select-char-instr (char chars 0))) (2 `(cclass-2 ,(char chars 0) ,(char chars 1))) (t (let ((simple-chars (coerce chars 'simple-string))) `(cclass ,simple-chars)))))) (defun select-negated-charclass-instr (chars) (let ((ccsize (length chars))) (case ccsize (1 `(not-char ,(char chars 0))) (2 `(not-cclass-2 ,(char chars 0) ,(char chars 1))) (t (let ((simple-chars (coerce chars 'simple-string))) `(not-cclass ,simple-chars)))))) (defun select-specclass-instr (class) `(specclass ,class)) (defun select-negated-specclass-instr (class) `(not-specclass ,class) ) (defun nullable-pattern-p (node) (cond ((null node) t) ((char-node-p node) nil) ((string-node-p node) nil) ((classseq-node-p node) nil) ((backmatch-node-p node) t) ((seq-node-p node) (every #'nullable-pattern-p (seq-node-children node))) ((kleene-node-p node) t) ((pkleene-node-p node) (nullable-pattern-p (pkleene-node-child node))) ((optional-node-p node) t) ((range-node-p node) (zerop (range-node-min node))) ((alt-node-p node) (some #'nullable-pattern-p (alt-node-children node))) ((start-anchor-node-p node) t) ((end-anchor-node-p node) t) ((register-node-p node) (nullable-pattern-p (register-node-child node))) ((regstart-node-p node) t) ((regend-node-p node) t) ((charclass-node-p node) nil) ((specclass-node-p node) nil) ((any-node-p node) nil) ((hook-node-p node) t) (t ;; once we're done, this should throw the :invalid-parse-tree tag (throw 'regex-parse-error (list "nullable-pattern-p: Unrecognized regex parse tree node ~S" node)))) )