;; ***************************************************************************** ;; FILE ;; Name : regexp-test-suite.cl ;; Date : 2002-03-01 ;; Author : Sébastien SAINT-SEVIN ;; Purpose : testing module for regular expressions ;; ;; Modified by KMP to run under both rightmost and leftmost matches. ;; ----------------------------------------------------------------------------- (in-package :REGEX-TEST) (allow-nonregister-groups) (allow-nongreedy-quantifiers) (allow-rangematch) (allow-backmatch) ;; rightmost matches are more canonical, but much slower. ;(registers-match-rightmost t) ;(pushnew :regex-right *features*) ;; leftmost matches are usually a lot faster with this engine. (registers-match-rightmost nil) (setq *features* (remove :regex-right *features*)) (defparameter *regexp-tests* '( ;; ********************************************************************* ;; ::= ( ) ;; ::= ( *) ;; --------------------------------------------------------------------- ;; ******************************************************* ;; the tests that follows are from: ;; ------------------------------------------------------- ;; (c) Sudhir Shenoy, 1996 ;; ;; The tests here are from: ;; ;; (a) Tom Lord's GNU rx package ;; (b) from the Zebu parser generator package ;; (modified to use new syntax) ;; ------------------------------------------------------- ;; All have been slightly modified to follow the syntax ;; I use in this module - Sébastien Saint-Sevin, 2002 ;; ------------------------------------------------------- ("a*a*" "aaaaaa" t t ("aaaaaa")) ("a*a*a*" "aaaaaa" t t ("aaaaaa")) ("a*a*a*a*" "aaaaaa" t t ("aaaaaa")) ("a*a*a*a*a*" "aaaaaa" t t ("aaaaaa")) ("a*a*a*a*a*a*" "aaaaaa" t t ("aaaaaa")) ("a*a*a*a*a*a*a*" "aaaaaa" t t ("aaaaaa")) ("" "" nil nil ()) ("b{0,6}" "" t t ("")) ("ab{0,0}c" "abc" t nil ()) ("ab{1,1}c" "abbc" t nil ()) ("ab{3,7}c" "abbbbbbbbc" t nil ()) ("ab{3,7}c" "abbbbbbbbbc" t nil ()) ("ab{3,7}c" "abbbbbbbbbbc" t nil ()) ("ab{3,7}c" "abbbbbbbbbbbc" t nil ()) ("b{2,7}" "bb" t t ("bb")) ("b{1,6}" "" t nil ()) ("b{1,6}" "b" t t ("b")) ("b{2,7}" "b" t nil ()) ("ab{0,7}c" "ac" t t ("ac")) ("ab{1,7}c" "abc" t t ("abc")) ("ab{2,7}c" "abbc" t t ("abbc")) ("ab{3,7}c" "abbbc" t t ("abbbc")) ("ab{3,7}c" "abbbbc" t t ("abbbbc")) ("ab{3,7}c" "abbbbbc" t t ("abbbbbc")) ("ab{3,7}c" "abbbbbbc" t t ("abbbbbbc")) ("ab{3,7}c" "abbbbbbbc" t t ("abbbbbbbc")) ("ab{3,7}c" "abbbbbbbbc" t nil ()) ("ab{3,7}c" "abbc" t nil ()) ("ab{3,7}c" "abc" t nil ()) ("(a|b)*c|(a|ab)*c" "xc" t t ("c" "" "")) ("(a)*" "b" t t ("" "")) ("(..)*(...)*" "a" t t ("" "" "")) ;;the following fails coz sshenoy's engine is a posix NFA ;;("(..)*(...)*" "abc" t t ("abc" "" "abc")) ("(..)*(...)*" "abc" t t ("ab" "ab" "")) ("^" "" t t ("")) ("$" "" t t ("")) ("^$" "" t t ("")) ("^a$" "a" t t ("a")) ("abc" "abc" t t ("abc")) ("abc" "xbc" t nil ()) ("abc" "axc" t nil ()) ("abc" "abx" t nil ()) ("abc" "xabcy" t t ("abc")) ("abc" "ababc" t t ("abc")) ("ab*c" "abc" t t ("abc")) ("ab*bc" "abc" t t ("abc")) ("ab*bc" "abbc" t t ("abbc")) ("ab*bc" "abbbbc" t t ("abbbbc")) ("ab+bc" "abbc" t t ("abbc")) ("ab+bc" "abc" t nil ()) ("ab+bc" "abq" t nil ()) ("ab+bc" "abbbbc" t t ("abbbbc")) ("ab?bc" "abbc" t t ("abbc")) ("ab?bc" "abc" t t ("abc")) ("ab?bc" "abbbbc" t nil ()) ("ab?c" "abc" t t ("abc")) ("^abc$" "abc" t t ("abc")) ("^abc$" "abcc" t nil ()) ("^abc" "abcc" t t ("abc")) ("^abc$" "aabc" t nil ()) ("abc$" "aabc" t t ("abc")) ("^" "abc" t t ("")) ("$" "abc" t t ("")) ("a.c" "abc" t t ("abc")) ("a.c" "axc" t t ("axc")) ("a.*c" "axyzc" t t ("axyzc")) ("a.*c" "axyzd" t nil ()) ("a[bc]d" "abc" t nil ()) ("a[bc]d" "abd" t t ("abd")) ("a[b-d]e" "abd" t nil ()) ("a[b-d]e" "ace" t t ("ace")) ("a[b-d]" "aac" t t ("ac")) ("a[-b]" "a-" t t ("a-")) ("a[b-]" "a-" t t ("a-")) ;;*** following is supposed to compile but what should it match ? ;;*** I don't know and that is why I reject the pattern. ;("a[b-a]" "-" t NIL NIL) ;("a[]b" "-" NIL NIL NIL) ;("a[" "-" NIL NIL NIL) ;("a]" "a]" t "a]" NIL) ;("a[]]b" "a]b" t "a]b" NIL) ("a[^bc]d" "aed" t t ("aed")) ("a[^bc]d" "abd" t nil ()) ("a[^-b]c" "adc" t t ("adc")) ("a[^-b]c" "a-c" t nil ()) ("a[^\\]b]c" "a]c" t nil ()) ("a[^\\]b]c" "adc" t t ("adc")) ("ab|cd" "abc" t t ("ab")) ("ab|cd" "abcd" t t ("ab")) ;;FAILED ("()ef" "def" t t ("ef" "")) ;;FAILED ("()*" "-" t t ("" "")) ;;FAILED ("*a" "-" t t ("")) ("^*" "-" t t ("")) ("$*" "-" t t ("")) ;;FAILED ("(*)b" "-" t t ("" "")) ("$b" "b" t nil ()) ("a\\(b" "a(b" t t ("a(b")) ("a\\(*b" "ab" t t ("ab")) ("a\\(*b" "a((b" t t ("a((b")) ("a\\\\b" "a\\b" t t ("a\\b")) ("(abc" "-" nil nil ()) ("((a))" "abc" t t ("a" "a" "a")) ("(a)b(c)" "abc" t t ("abc" "a" "c")) ("a+b+c" "aabbabc" t t ("abc")) ("a**" "-" t t ("")) ("a*?" "-" t t ("")) ("(a*)*" "-" t t ("" "")) ("(a*)+" "-" t t ("" "")) ("(a|)*" "-" t t ("" "")) ("(a*|b)*" "-" t t ("" "")) #+:regex-right("(a+|b)*" "ab" t t ("ab" "b")) #-:regex-right("(a+|b)*" "ab" t t ("ab" "a")) #+:regex-right("(a+|b)+" "ab" t t ("ab" "b")) #-:regex-right("(a+|b)+" "ab" t t ("ab" "a")) ("(a+|b)?" "ab" t t ("a" "a")) ("[^ab]*" "cde" t t ("cde")) ("(^)*" "-" t t ("" "")) ("(ab|)*" "-" t t ("" "")) (")(" "-" nil nil ()) ("" "abc" nil nil ()) ("abc" "" t nil ()) ("a*" "" t t ("")) #+:regex-right("([abc])*d" "abbbcd" t t ("abbbcd" "c")) #-:regex-right("([abc])*d" "abbbcd" t t ("abbbcd" "a")) ("([abc])*bcd" "abcd" t t ("abcd" "a")) ("a|b|c|d|e" "e" t t ("e")) ("(a|b|c|d|e)f" "ef" t t ("ef" "e")) ("((a*|b))*" "-" t t ("" "" "")) ("abcd*efg" "abcdefg" t t ("abcdefg")) ("ab*" "xabyabbbz" t t ("ab")) ("ab*" "xayabbbz" t t ("a")) ("(ab|cd)e" "abcde" t t ("cde" "cd")) ("[abhgefdc]ij" "hij" t t ("hij")) ("^(ab|cd)e" "abcde" t nil ()) ("(abc|)ef" "abcdef" t t ("ef" "")) ("(a|b)c*d" "abcd" t t ("bcd" "b")) ("(ab|ab*)bc" "abc" t t ("abc" "a")) ("a([bc]*)c*" "abc" t t ("abc" "bc")) ("a([bc]*)(c*d)" "abcd" t t ("abcd" "bc" "d")) ("a([bc]+)(c*d)" "abcd" t t ("abcd" "bc" "d")) ("a([bc]*)(c+d)" "abcd" t t ("abcd" "b" "cd")) ("a[bcd]*dcdcde" "adcdcde" t t ("adcdcde")) ("a[bcd]+dcdcde" "adcdcde" t nil ()) ("(ab|a)b*c" "abc" t t ("abc" "ab")) ("((a)(b)c)(d)" "abcd" t t ("abcd" "abc" "a" "b" "d")) ("[a-zA-Z_][a-zA-Z0-9_]*" "alpha" t t ("alpha")) ("^a(bc+|b[eh])g|.h$" "abh" t t ("bh" "")) ("(bc+d$|ef*g.|h?i(j|k))" "effgz" t t ("effgz" "effgz" "")) ("(bc+d$|ef*g.|h?i(j|k))" "ij" t t ("ij" "ij" "j")) ("(bc+d$|ef*g.|h?i(j|k))" "effg" t nil ()) ("(bc+d$|ef*g.|h?i(j|k))" "bcdd" t nil ()) ("(bc+d$|ef*g.|h?i(j|k))" "reffgz" t t ("effgz" "effgz" "")) ("((((((((((a))))))))))" "a" t t ("a" "a" "a" "a" "a" "a" "a" "a" "a" "a" "a")) ("(((((((((a)))))))))" "a" t t ("a" "a" "a" "a" "a" "a" "a" "a" "a" "a")) ("multiple words of text" "uh-uh" t nil ()) ("multiple words" "multiple words, yeah" t t ("multiple words")) ("(.*)c(.*)" "abcde" t t ("abcde" "ab" "de")) ("\\((.*), (.*)\\)" "(a, b)" t t ("(a, b)" "a" "b")) ("[k]" "ab" t nil ()) ("abcd" "abcd" t t ("abcd")) ("a(bc)d" "abcd" t t ("abcd" "bc")) ("a[-]?c" "ac" t t ("ac")) ("a[-]?c" "ac" t t ("ac")) ("a[-]?c" "ac" t t ("ac")) ("[ -~]*" "abc" t t ("abc")) ("[ -~ -~]*" "abc" t t ("abc")) ("[ -~ -~ -~]*" "abc" t t ("abc")) ("[ -~ -~ -~ -~]*" "abc" t t ("abc")) ("[ -~ -~ -~ -~ -~]*" "abc" t t ("abc")) ("[ -~ -~ -~ -~ -~ -~]*" "abc" t t ("abc")) ("[ -~ -~ -~ -~ -~ -~ -~]*" "abc" t t ("abc")) ;; ;; Tests from from the Zebu package (originally for nregex.lisp) ;; ("(na)x+" "naxna" t t ("nax" "na")) ("(na)x+na" "naxna123" t t ("naxna" "na")) ("(na)x+" "naxxos" t t ("naxx" "na")) ("(na)x+" "naxos" t t ("nax" "na")) ("(na)x+" "naos" t nil ()) ("(na)x*" "naxxos" t t ("naxx" "na")) ("(na)x*" "naxos" t t ("nax" "na")) ("(na)x*" "naos" t t ("na" "na")) ("[0-9]+" "123ab" t t ("123")) ("[a-zA-Z]+" "aAbb123" t t ("aAbb")) ("[0-9a-z]+" "1234&&*" t t ("1234")) ("[0-9a-z]+" "1234a&&*" t t ("1234a")) ("[0-9a-zA-Z]+" "a1234a" t t ("a1234a")) ("[0-9a-zA-Z&]+" "aAbb123&&*" t t ("aAbb123&&")) ("[0-9]+\\.[0-9]*" "0.123cm" t t ("0.123")) ; ("{[^} ;]*}" "{M.D. Harrison and A. Monk (Ed.)} \n\t foo: 2" ;T "{M.D. Harrison and A. Monk (Ed.)}" NIL) ; ("{[^} ;]*}" "{M.D. Harrison and ;A. Monk (Ed.)} \n\t foo: 2" t NIL NIL) ; ("{[^} ;]*}" ; "{M.D. Harrison and {A. Monk} (Ed.)} \n\t foo: 2" ; t "{M.D. Harrison and {A. Monk}" NIL) ("ca?r" "car" t t ("car")) ("ca?r" "cr" t t ("cr")) ("c[ad]+r" "caaar" t t ("caaar")) ("c[ad]+r" "caaar aa1" t t ("caaar")) ("c[ad]+r$" "caaar" t t ("caaar")) (".*" "" t t ("")) (".*" "aa" t t ("aa")) ("c[ad]?r" "cr" t t ("cr")) ("c[ad]?r" "car" t t ("car")) ("c[ad]?r" "cdr" t t ("cdr")) ("c[0-9]?r" "cr" t t ("cr")) ("c[0-9]?r" "c9rxx" t t ("c9r")) ("c[0-9]?r" "crxx" t t ("cr")) ("a|b" "a" t t ("a")) ("ab.yz" "ab yz" t t ("ab yz")) ;("ab.yz" "ab ;yz" t t ("ab ;yz")) ("(abc){1,2}" "abcabc" t t ("abcabc" "abc")) ;("(abc){1,2}x*(def)y*def" "abcabcxxxxdefyyyyyyydef$%%%%%" ; t "abcabcxxxxdefyyyyyyydef" #("abc" "def")) ("a|bc*" "a" t t ("a")) ("[A-Z]+" "ABCY" t t ("ABCY")) ("[0-9]+\\.[0-9]*(e[+-]?[0-9]+)" "12.3e4 k" t t ("12.3e4" "e4")) ("[0-9]+\\.[0-9]*(e[+-]?[0-9]+)" "12.3e-4 k" t t ("12.3e-4" "e-4")) ("[0-9]+\\.[0-9]*(e[+-]?[0-9]+)?" "12.3 k" t t ("12.3" "")) ;; ;; The Gadaffi tests ;; Note that the first group matches NULL because it is always sucked ;; up by the preceding .* in case of a successful match. ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Qaddafi" t t ("Muammar Qaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mo'ammar Gadhafi" t t ("Mo'ammar Gadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Kaddafi" t t ("Muammar Kaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Qadhafi" t t ("Muammar Qadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moammar El Kadhafi" t t ("Moammar El Kadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Gadafi" t t ("Muammar Gadafi" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar al-Qadafi" t t ("Mu'ammar al-Qadafi" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moamer El Kazzafi" t t ("Moamer El Kazzafi" "" "zz")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moamar al-Gaddafi" t t ("Moamar al-Gaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar Al Qathafi" t t ("Mu'ammar Al Qathafi" "" "th")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Al Qathafi" t t ("Muammar Al Qathafi" "" "th")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mo'ammar el-Gadhafi" t t ("Mo'ammar el-Gadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moamar El Kadhafi" t t ("Moamar El Kadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar al-Qadhafi" t t ("Muammar al-Qadhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar al-Qadhdhafi" t t ("Mu'ammar al-Qadhdhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar Qadafi" t t ("Mu'ammar Qadafi" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moamar Gaddafi" t t ("Moamar Gaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar Qadhdhafi" t t ("Mu'ammar Qadhdhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Khaddafi" t t ("Muammar Khaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar al-Khaddafi" t t ("Muammar al-Khaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'amar al-Kadafi" t t ("Mu'amar al-Kadafi" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Ghaddafy" t t ("Muammar Ghaddafy" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Ghadafi" t t ("Muammar Ghadafi" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Ghaddafi" t t ("Muammar Ghaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muamar Kaddafi" t t ("Muamar Kaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Quathafi" t t ("Muammar Quathafi" "" "th")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muammar Gheddafi" t t ("Muammar Gheddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Muamar Al-Kaddafi" t t ("Muamar Al-Kaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moammar Khadafy " t t ("Moammar Khadafy" "" "d")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Moammar Qudhafi" t t ("Moammar Qudhafi" "" "dh")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mu'ammar al-Qaddafi" t t ("Mu'ammar al-Qaddafi" "" "dd")) ("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]" "Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi" t t ("Mu'ammar Muhammad Abu Minyar al-Qadhafi" "" "dh")) ;; ;; tests involving back-refs #+:regex-right("((a|b{10,11})|(b))*-\\2" "aaab-a" t t ("aaab-a" "b" "a" "b")) #-:regex-right("((a|b{10,11})|(b))*-\\2" "aaab-a" t t ("aaab-a" "a" "a" "b")) ("(a)*-\\1" "aaa-a" t t ("aaa-a" "a")) ("(a)*-\\1b" "aaa-b" t t ("-b" "")) ("([xyz])(-\\2)" "x-y" t nil ()) ("(([xyz])(-\\2))" "x-y" t nil ()) ("(([xyz])(-\\2)*)*" "x-y" t t ("x" "x" "x" "")) ("(([xyz])(-\\2)*)*" "x-" t t ("x" "x" "x" "")) #+:regex-right("(([xyz])(-\\2)*)*" "xy-yz-y" t t ("xy-yz" "z" "z" "-y")) ;; kmp haven't fixed this one yet... ;#-:regex-right("(([xyz])(-\\2)*)*" "xy-xz-x" t t ("xy-xz" "z" "z" "-y")) ;; kmp -- this *should* match ; ("((.*)\\1)+" "xxxxxx" t nil ()) ; for rightmost register match: #+:regex-right("((.*)\\1)+" "xxxxxx" t t ("xxxxxx" "" "")) ; for leftmost register match: #-:regex-right("((.*)\\1)+" "xxxxxx" t t ("xxxxxx" "xxxxxx" "xxx")) ("(a*)\\1\\1(a*)\\2\\2\\2" "aaaaaa" t t ("aaaaaa" "aa" "")) ("(a*)(a*)\\1\\2" "aaaa" t t ("aaaa" "aa" "")) ("(a*)\\1(a*)\\2\\2" "aaaa" t t ("aaaa" "aa" "")) ("(a*)\\1\\1(a*)" "aaaaaa" t t ("aaaaaa" "aa" "")) ("(a*)\\1\\1(a*)\\2" "aaaaaa" t t ("aaaaaa" "aa" "")) ("(a*)\\1\\1(a*)\\2\\2" "aaaaaa" t t ("aaaaaa" "aa" "")) ("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaaa" t t ("aaaaaa" "aa" "")) ;;the following fails coz sshenoy's engine is a posix NFA ;("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaaaa" t t ("aaaaaaa" "a" "a")) ("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaaaa" t t ("aaaaaa" "aa" "")) ("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaaa" t t ("aaaaaa" "aa" "")) ;;the following fails coz sshenoy's engine is a posix NFA ;("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaa" t t ("aaaa" "" "a")) ("(.*)\\1\\1(.*)\\2\\2\\2" "aaaaa" t t ("aaa" "a" "")) ("(.*)\\1\\1" "aaa" t t ("aaa" "a")) #+:regex-right("(.*)*\\1" "xx" t t ("xx" "")) #-:regex-right("(.*)*\\1" "xx" t t ("xx" "x")) ("(....).*\\1" "beriberi" t t ("beriberi" "beri")) ; ;; ; ;; Some tests for class matches (my own) ; ;; ; ("[[:alpha:]_][[:alnum:]_]*" "c_identifier" t "c_identifier" NIL) ; ("[[:xdigit:]]*" "12aBcD89" t "12aBcD89" NIL) ; ;; In the following pattern, because :] is missing, the pattern is ; ;; interpreted as an ordinary range ; ("[[:xdigit]+" "0[x:dig" t "[x:dig" NIL))) ;; ******************************************************* ;; the tests that follows are from: ;; ------------------------------------------------------- ;; Sébastien Saint-Sevin, 2002 ;; ------------------------------------------------------- ;; some basics ;; ----------- (".*" "aa" t t ("aa")) (".+" "aa" t t ("aa")) ;; anchor ;; ------ ;; alternate ;; --------- ("(hello|man|)" "" t t ("" "")) ("(a+|b)" "aaa" t t ("aaa" "aaa")) ("(a+|b)" "b" t t ("b" "b")) ;; character classes ;; ----------------- ("[abc]{1,3}" "bcaa" t t ("bca")) ("a[\\-]?c" "ac" t t ("ac")) ("a[\\-]?c" "a-c" t t ("a-c")) ("a[-]?c" "ac" t t ("ac")) ("a[-]?c" "a-c" t t ("a-c")) ("a[-b]?c" "abc" t t ("abc")) ("a[b-]?c" "acc" t t ("ac")) ; "a[\\[]c" ; "a[\\^]c" ; "a[\\]]c" ; ("a[^\\-]?c" "ac" t t ("ac")) ; ("a[^\\-]?c" "a-c" t nil ()) ; ("a[^-]?c" "ac" t t ("ac")) ; ("a[^-]?c" "a-c" t t ("a-c")) ; ("a[^-b]?c" "abc" t t ("abc")) ; ("a[^b-]?c" "acc" t t ("ac")) ; "a[^\\[]c" ; "a[^\\^]c" ; "a[^\\]]c" ;; posix character classes ;; ----------------------- ;; greedy quantifiers ;; ------------------ ("a*" "aaaa" t t ("aaaa")) ("a+" "aaaa" t t ("aaaa")) ("a{2,3}" "aaaa" t t ("aaa")) ;; nongreedy quantifiers ;; --------------------- ("a*?" "aaaa" t t ("")) ("a+?" "aaaa" t t ("a")) ("a{2,3}?" "aaaa" t t ("aa")) ("a+?bb*?" "baaaabaaabbbaaaaa" t t ("aaaab")) ("a+?bb+?" "baaaabaaabbbaaaaa" t t ("aaabb")) ("[abc]{10,20}?" "xxxbcbcbabcaabcbabcbcbabcbcaabcabxxx" t t ("bcbcbabcaa")) ;; grouping ;; -------- ;; nonregister grouping ;; -------------------- ; "((?a+)|b)" ;; greedy quantifiers + backrefs ;; ----------------------------- ("^(x)+$" "xx" t t ("xx" "x")) ("^(x)+\\1$" "xx" t t ("xx" "x")) ("^(x){1,2}$" "xx" t t ("xx" "x")) ("^(x){1,2}\\1$" "xx" t t ("xx" "x")) ("^(x)+[^x]+\\1$" "xxaax" t t ("xxaax" "x")) ("^x*(x)[^x]+\\1$" "xxaax" t t ("xxaax" "x")) ("(x)+\\1" "xxxx" t t ("xxxx" "x")) ("(x){1,2}" "xxxx" t t ("xx" "x")) ;; kmp By the letter, (x) can only match one character. To get this ;; affect, the pattern should be "(x{1,2})\\1" ; ("(x){1,2}\\1" "xxxx" t t ("xxxx" "x")) ("(x){1,2}\\1" "xxxx" t t ("xxx" "x")) ("(x)+[^x]+\\1" "xxaax" t t ("xxaax" "x")) ("x*(x)[^x]+\\1" "xxaax" t t ("xxaax" "x")) ;; nongreedy quantifiers + backrefs ;; -------------------------------- ("(x)+?\\1" "xxxx" t t ("xx" "x")) ("(x){1,2}?" "xxxx" t t ("x" "x")) ("(x){1,2}?\\1" "xxxx" t t ("xx" "x")) ("(x)+?[^x]+\\1" "xxaax" t t ("xxaax" "x")) ("x*?(x)[^x]+\\1" "xxaax" t t ("xxaax" "x")) ;; misc ;; ---- ;; kmp it is legal for a* to match nothing ; ("(a*)*" "aaaa" t t ("aaaa" "aaaa")) #+:regex-right("(a*)*" "aaaa" t t ("aaaa" "")) #-:regex-right("(a*)*" "aaaa" t t ("aaaa" "aaaa")) ;; kmp it is legal for a* to match nothing ; ("(a*)+" "aaaa" t t ("aaaa" "aaaa")) #+:regex-right("(a*)+" "aaaa" t t ("aaaa" "")) #-:regex-right("(a*)+" "aaaa" t t ("aaaa" "aaaa")) ("(a+)*" "aaaa" t t ("aaaa" "aaaa")) ("(a+)*" "aaaa" t t ("aaaa" "aaaa")) )) ;; ***************************************************************************** ;; FUNCTION ;; Name : run-sebastien-tests ;; Date : 2002-03-08 ;; Author : 3S ;; Arguments : none ;; Side Effects : print testing results ;; Purpose : guess it ;; ----------------------------------------------------------------------------- (defun run-sebastien-tests () (regex::clear-pattern-cache) ;; kmp helps for when I'm debugging the compiler (print ";; *****************************************************************************") (print ";; BEGIN OF TEST") (print ";; -----------------------------------------------------------------------------") (dolist (test *regexp-tests*) (destructuring-bind (pattern str expected-compile-p expected-matched-p expected-results) test (format t "~%pattern: ~A ~%string: ~A" pattern str) (let ((matcher (compile-str pattern))) (cond ((and matcher (not expected-compile-p)) (format t "~%Shouldn't have compiled, but did ******************** TEST FAILED")) ((and (not matcher) expected-compile-p) (format t "~%Should have compiled, but didn't ******************** TEST FAILED")) ) (when matcher (multiple-value-bind (matched-p start len regs) (scan-str matcher str) (cond ((and expected-matched-p (not matched-p)) (format t "~%Should have matched, but didn't ******************** TEST FAILED")) ((and (not expected-matched-p) matched-p) (format t "~%Shouldn't have matched, but did ******************** TEST FAILED")) ) (when matched-p (if (string= (car expected-results) (subseq str start (+ start len))) (format t "~%Global match OK" ) (format t "~%Global match ******************** TEST FAILED") ) (let ((num-groups (array-dimension regs 0)) ) (if (/= (length expected-results) num-groups) (format t "~%Number of groups ******************** TEST FAILED") (dotimes (i num-groups) (let* ((group-start (register-start regs i)) (group-end (register-end regs i)) (expected-value (nth i expected-results)) (calculated-value (if (register-matched-p regs i) (subseq str group-start group-end) "")) ) (if (string= expected-value calculated-value) (format t "~%Group ~A OK ==> ~A" i calculated-value) (format t "~%Group ~A ==> ~A instead of ~A ******************** TEST FAILED" i calculated-value expected-value) ) )) ))) ))) ) (terpri)) (print ";; *****************************************************************************") (print ";; END OF TEST") (print ";; -----------------------------------------------------------------------------") ) ;; ***************************************************************************** ;; END OF FILE ;; -----------------------------------------------------------------------------