Coverage report: /home/jsnell/.sbcl/site/cl-ppcre-1.2.13/api.lisp
Kind | Covered | All | % |
expression | 368 | 1223 | 30.1 |
branch | 25 | 90 | 27.8 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2
;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.61 2005/12/06 16:50:50 edi Exp $
4
;;; The external API for creating and using scanners.
6
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
8
;;; Redistribution and use in source and binary forms, with or without
9
;;; modification, are permitted provided that the following conditions
12
;;; * Redistributions of source code must retain the above copyright
13
;;; notice, this list of conditions and the following disclaimer.
15
;;; * Redistributions in binary form must reproduce the above
16
;;; copyright notice, this list of conditions and the following
17
;;; disclaimer in the documentation and/or other materials
18
;;; provided with the distribution.
20
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
21
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
24
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
26
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
(in-package #:cl-ppcre)
34
(defgeneric create-scanner (regex &key case-insensitive-mode
39
(:documentation "Accepts a regular expression - either as a
40
parse-tree or as a string - and returns a scan closure which will scan
41
strings for this regular expression. The \"mode\" keyboard arguments
42
are equivalent to the imsx modifiers in Perl. If DESTRUCTIVE is not
43
NIL the function is allowed to destructively modify its first argument
44
\(but only if it's a parse tree)."))
46
#-:use-acl-regexp2-engine
47
(defmethod create-scanner ((regex-string string) &key case-insensitive-mode
52
(declare #.*standard-optimize-settings*)
53
(declare (ignore destructive))
54
;; parse the string into a parse-tree and then call CREATE-SCANNER
56
(let* ((*extended-mode-p* extended-mode)
57
(quoted-regex-string (if *allow-quoting*
58
(quote-sections (clean-comments regex-string extended-mode))
60
(*syntax-error-string* (copy-seq quoted-regex-string)))
61
;; wrap the result with :GROUP to avoid infinite loops for
63
(create-scanner (cons :group (list (parse-string quoted-regex-string)))
64
:case-insensitive-mode case-insensitive-mode
65
:multi-line-mode multi-line-mode
66
:single-line-mode single-line-mode
69
#-:use-acl-regexp2-engine
70
(defmethod create-scanner ((scanner function) &key case-insensitive-mode
75
(declare #.*standard-optimize-settings*)
76
(declare (ignore destructive))
77
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
78
(signal-ppcre-invocation-error
79
"You can't use the keyword arguments to modify an existing scanner."))
82
#-:use-acl-regexp2-engine
83
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
88
(declare #.*standard-optimize-settings*)
90
(signal-ppcre-invocation-error
91
"Extended mode doesn't make sense in parse trees."))
92
;; convert parse-tree into internal representation REGEX and at the
93
;; same time compute the number of registers and the constant string
94
;; (or anchor) the regex starts with (if any)
96
(setq parse-tree (copy-tree parse-tree)))
99
(push :single-line-mode-p flags))
101
(push :multi-line-mode-p flags))
102
(if case-insensitive-mode
103
(push :case-insensitive-p flags))
105
(setq parse-tree (list :group (cons :flags flags) parse-tree))))
106
(let ((*syntax-error-string* nil))
107
(multiple-value-bind (regex reg-num starts-with)
109
;; simplify REGEX by flattening nested SEQ and ALTERNATION
110
;; constructs and gathering STR objects
111
(let ((regex (gather-strings (flatten regex))))
112
;; set the MIN-REST slots of the REPETITION objects
113
(compute-min-rest regex 0)
114
;; set the OFFSET slots of the STR objects
115
(compute-offsets regex 0)
116
(let* (end-string-offset
118
;; compute the constant string the regex ends with (if
119
;; any) and at the same time set the special variables
120
;; END-STRING-OFFSET and END-ANCHORED-P
121
(end-string (end-string regex))
122
;; if we found a non-zero-length end-string we create an
123
;; efficient search function for it
124
(end-string-test (and end-string
125
(plusp (len end-string))
126
(if (= 1 (len end-string))
127
(create-char-searcher
128
(schar (str end-string) 0)
129
(case-insensitive-p end-string))
132
(case-insensitive-p end-string)))))
133
;; initialize the counters for CREATE-MATCHER-AUX
135
(*zero-length-num* 0)
136
;; create the actual matcher function (which does all the
137
;; work of matching the regular expression) corresponding
138
;; to REGEX and at the same time set the special
139
;; variables *REP-NUM* and *ZERO-LENGTH-NUM*
140
(match-fn (create-matcher-aux regex #'identity))
141
;; if the regex starts with a string we create an
142
;; efficient search function for it
143
(start-string-test (and (typep starts-with 'str)
144
(plusp (len starts-with))
145
(if (= 1 (len starts-with))
146
(create-char-searcher
147
(schar (str starts-with) 0)
148
(case-insensitive-p starts-with))
151
(case-insensitive-p starts-with))))))
152
(declare (special end-string-offset end-anchored-p end-string))
153
;; now create the scanner and return it
154
(create-scanner-aux match-fn
155
(regex-min-length regex)
156
(or (start-anchored-p regex)
157
;; a dot in single-line-mode also
158
;; implicitely anchors the regex at
159
;; the start, i.e. if we can't match
160
;; from the first position we won't
162
(and (typep starts-with 'everything)
163
(single-line-p starts-with)))
166
;; only mark regex as end-anchored if we
167
;; found a non-zero-length string before
169
(and end-string-test end-anchored-p)
179
#+:use-acl-regexp2-engine
180
(declaim (inline create-scanner))
182
#+:use-acl-regexp2-engine
183
(defmethod create-scanner ((scanner regexp::regular-expression) &key case-insensitive-mode
188
(declare (ignore destructive))
189
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
190
(signal-ppcre-invocation-error
191
"You can't use the keyword arguments to modify an existing scanner."))
194
#+:use-acl-regexp2-engine
195
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
200
(declare (ignore destructive))
201
(excl:compile-re parse-tree
202
:case-fold case-insensitive-mode
203
:ignore-whitespace extended-mode
204
:multiple-lines multi-line-mode
205
:single-line single-line-mode
208
(defgeneric scan (regex target-string &key start end real-start-pos)
209
(:documentation "Searches TARGET-STRING from START to END and tries
210
to match REGEX. On success returns four values - the start of the
211
match, the end of the match, and two arrays denoting the beginnings
212
and ends of register matches. On failure returns NIL. REGEX can be a
213
string which will be parsed according to Perl syntax, a parse tree, or
214
a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
215
be coerced to a simple string if it isn't one already. The
216
REAL-START-POS parameter should be ignored - it exists only for
217
internal purposes."))
219
#-:use-acl-regexp2-engine
220
(defmethod scan ((regex-string string) target-string
222
(end (length target-string))
223
((:real-start-pos *real-start-pos*) nil))
224
(declare #.*standard-optimize-settings*)
225
;; note that the scanners are optimized for simple strings so we
226
;; have to coerce TARGET-STRING into one if it isn't already
227
(funcall (create-scanner regex-string)
228
(maybe-coerce-to-simple-string target-string)
231
#-:use-acl-regexp2-engine
232
(defmethod scan ((scanner function) target-string
234
(end (length target-string))
235
((:real-start-pos *real-start-pos*) nil))
236
(declare #.*standard-optimize-settings*)
238
(maybe-coerce-to-simple-string target-string)
241
#-:use-acl-regexp2-engine
242
(defmethod scan ((parse-tree t) target-string
244
(end (length target-string))
245
((:real-start-pos *real-start-pos*) nil))
246
(declare #.*standard-optimize-settings*)
247
(funcall (create-scanner parse-tree)
248
(maybe-coerce-to-simple-string target-string)
251
#+:use-acl-regexp2-engine
252
(declaim (inline scan))
254
#+:use-acl-regexp2-engine
255
(defmethod scan ((parse-tree t) target-string
257
(end (length target-string))
258
((:real-start-pos *real-start-pos*) nil))
260
(return-from scan nil))
261
(let ((results (multiple-value-list (excl:match-re parse-tree target-string
265
(declare (dynamic-extent results))
266
(cond ((null (first results)) nil)
267
(t (let* ((no-of-regs (- (length results) 2))
268
(reg-starts (make-array no-of-regs
269
:element-type '(or null fixnum)))
270
(reg-ends (make-array no-of-regs
271
:element-type '(or null fixnum)))
272
(match (second results)))
273
(loop for (start . end) in (cddr results)
275
do (setf (aref reg-starts i) start
276
(aref reg-ends i) end))
277
(values (car match) (cdr match) reg-starts reg-ends))))))
280
(define-compiler-macro scan (&whole form &environment env regex target-string &rest rest)
281
"Make sure that constant forms are compiled into scanners at compile time."
282
(cond ((constantp regex env)
283
`(scan (load-time-value
284
(create-scanner ,regex))
285
,target-string ,@rest))
288
(defun scan-to-strings (regex target-string &key (start 0)
289
(end (length target-string))
291
(declare #.*standard-optimize-settings*)
292
"Like SCAN but returns substrings of TARGET-STRING instead of
293
positions, i.e. this function returns two values on success: the whole
294
match as a string plus an array of substrings (or NILs) corresponding
295
to the matched registers. If SHAREDP is true, the substrings may share
296
structure with TARGET-STRING."
297
(multiple-value-bind (match-start match-end reg-starts reg-ends)
298
(scan regex target-string :start start :end end)
300
(return-from scan-to-strings nil))
301
(let ((substr-fn (if sharedp #'nsubseq #'subseq)))
302
(values (funcall substr-fn
303
target-string match-start match-end)
305
(lambda (reg-start reg-end)
308
target-string reg-start reg-end)
314
(define-compiler-macro scan-to-strings
315
(&whole form &environment env regex target-string &rest rest)
316
"Make sure that constant forms are compiled into scanners at compile time."
317
(cond ((constantp regex env)
318
`(scan-to-strings (load-time-value
319
(create-scanner ,regex))
320
,target-string ,@rest))
323
(defmacro register-groups-bind (var-list (regex target-string
324
&key start end sharedp)
326
"Executes BODY with the variables in VAR-LIST bound to the
327
corresponding register groups after TARGET-STRING has been matched
328
against REGEX, i.e. each variable is either bound to a string or to
329
NIL. If there is no match, BODY is _not_ executed. For each element of
330
VAR-LIST which is NIL there's no binding to the corresponding register
331
group. The number of variables in VAR-LIST must not be greater than
332
the number of register groups. If SHAREDP is true, the substrings may
333
share structure with TARGET-STRING."
334
(with-rebinding (target-string)
335
(with-unique-names (match-start match-end reg-starts reg-ends
336
start-index substr-fn)
337
`(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
338
(scan ,regex ,target-string :start (or ,start 0)
339
:end (or ,end (length ,target-string)))
340
(declare (ignore ,match-end))
343
`(,substr-fn (if ,sharedp
346
(loop for (function var) in (normalize-var-list var-list)
349
collect `(,var (let ((,start-index
350
(aref ,reg-starts ,counter)))
356
(aref ,reg-ends ,counter)))
360
(defmacro do-scans ((match-start match-end reg-starts reg-ends regex
362
&optional result-form
366
"Iterates over TARGET-STRING and tries to match REGEX as often as
367
possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
368
REG-ENDS bound to the four return values of each match in turn. After
369
the last match, returns RESULT-FORM if provided or NIL otherwise. An
370
implicit block named NIL surrounds DO-SCANS; RETURN may be used to
371
terminate the loop immediately. If REGEX matches an empty string the
372
scan is continued one position behind this match. BODY may start with
374
(with-rebinding (target-string)
375
(with-unique-names (%start %end %regex scanner loop-tag block-name)
376
(declare (ignorable %regex scanner))
377
;; the NIL BLOCK to enable exits via (RETURN ...)
379
(let* ((,%start (or ,start 0))
380
(,%end (or ,end (length ,target-string)))
381
,@(unless (constantp regex env)
382
;; leave constant regular expressions as they are -
383
;; SCAN's compiler macro will take care of them;
384
;; otherwise create a scanner unless the regex is
385
;; already a function (otherwise SCAN will do this
386
;; on each iteration)
388
(,scanner (typecase ,%regex
390
(t (create-scanner ,%regex)))))))
391
;; coerce TARGET-STRING to a simple string unless it is one
392
;; already (otherwise SCAN will do this on each iteration)
394
(maybe-coerce-to-simple-string ,target-string))
395
;; a named BLOCK so we can exit the TAGBODY
399
;; invoke SCAN and bind the returned values to the
400
;; provided variables
402
(,match-start ,match-end ,reg-starts ,reg-ends)
403
(scan ,(cond ((constantp regex env) regex)
405
,target-string :start ,%start :end ,%end
406
:real-start-pos ,%start)
407
;; declare the variables to be IGNORABLE to prevent the
408
;; compiler from issuing warnings
410
(ignorable ,match-start ,match-end ,reg-starts ,reg-ends))
412
;; stop iteration on first failure
413
(return-from ,block-name ,result-form))
414
;; execute BODY (wrapped in LOCALLY so it can start with
418
;; advance by one position if we had a zero-length match
419
(setq ,%start (if (= ,match-start ,match-end)
422
(go ,loop-tag))))))))
424
(defmacro do-matches ((match-start match-end regex
426
&optional result-form
429
"Iterates over TARGET-STRING and tries to match REGEX as often as
430
possible evaluating BODY with MATCH-START and MATCH-END bound to the
431
start/end positions of each match in turn. After the last match,
432
returns RESULT-FORM if provided or NIL otherwise. An implicit block
433
named NIL surrounds DO-MATCHES; RETURN may be used to terminate the
434
loop immediately. If REGEX matches an empty string the scan is
435
continued one position behind this match. BODY may start with
437
;; this is a simplified form of DO-SCANS - we just provide two dummy
438
;; vars and ignore them
439
(with-unique-names (reg-starts reg-ends)
440
`(do-scans (,match-start ,match-end
441
,reg-starts ,reg-ends
442
,regex ,target-string
444
:start ,start :end ,end)
447
(defmacro do-matches-as-strings ((match-var regex
449
&optional result-form
450
&key start end sharedp)
452
"Iterates over TARGET-STRING and tries to match REGEX as often as
453
possible evaluating BODY with MATCH-VAR bound to the substring of
454
TARGET-STRING corresponding to each match in turn. After the last
455
match, returns RESULT-FORM if provided or NIL otherwise. An implicit
456
block named NIL surrounds DO-MATCHES-AS-STRINGS; RETURN may be used to
457
terminate the loop immediately. If REGEX matches an empty string the
458
scan is continued one position behind this match. If SHAREDP is true,
459
the substrings may share structure with TARGET-STRING. BODY may start
461
(with-rebinding (target-string)
462
(with-unique-names (match-start match-end substr-fn)
463
`(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
464
;; simple use DO-MATCHES to extract the substrings
465
(do-matches (,match-start ,match-end ,regex ,target-string
466
,result-form :start ,start :end ,end)
469
,target-string ,match-start ,match-end)))
472
(defmacro do-register-groups (var-list (regex target-string
473
&optional result-form
474
&key start end sharedp)
476
"Iterates over TARGET-STRING and tries to match REGEX as often as
477
possible evaluating BODY with the variables in VAR-LIST bound to the
478
corresponding register groups for each match in turn, i.e. each
479
variable is either bound to a string or to NIL. For each element of
480
VAR-LIST which is NIL there's no binding to the corresponding register
481
group. The number of variables in VAR-LIST must not be greater than
482
the number of register groups. After the last match, returns
483
RESULT-FORM if provided or NIL otherwise. An implicit block named NIL
484
surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
485
immediately. If REGEX matches an empty string the scan is continued
486
one position behind this match. If SHAREDP is true, the substrings may
487
share structure with TARGET-STRING. BODY may start with declarations."
488
(with-rebinding (target-string)
489
(with-unique-names (substr-fn match-start match-end
490
reg-starts reg-ends start-index)
491
`(let ((,substr-fn (if ,sharedp
494
(do-scans (,match-start ,match-end ,reg-starts ,reg-ends
495
,regex ,target-string
496
,result-form :start ,start :end ,end)
497
(let ,(loop for (function var) in (normalize-var-list var-list)
500
collect `(,var (let ((,start-index
501
(aref ,reg-starts ,counter)))
507
(aref ,reg-ends ,counter)))
511
(defun all-matches (regex target-string
513
(end (length target-string)))
514
(declare #.*standard-optimize-settings*)
515
"Returns a list containing the start and end positions of all
516
matches of REGEX against TARGET-STRING, i.e. if there are N matches
517
the list contains (* 2 N) elements. If REGEX matches an empty string
518
the scan is continued one position behind this match."
520
(do-matches (match-start match-end
522
(nreverse result-list)
523
:start start :end end)
524
(push match-start result-list)
525
(push match-end result-list))))
528
(define-compiler-macro all-matches (&whole form &environment env regex &rest rest)
529
"Make sure that constant forms are compiled into scanners at
531
(cond ((constantp regex env)
532
`(all-matches (load-time-value
533
(create-scanner ,regex))
537
(defun all-matches-as-strings (regex target-string
539
(end (length target-string))
541
(declare #.*standard-optimize-settings*)
542
"Returns a list containing all substrings of TARGET-STRING which
543
match REGEX. If REGEX matches an empty string the scan is continued
544
one position behind this match. If SHAREDP is true, the substrings may
545
share structure with TARGET-STRING."
547
(do-matches-as-strings (match regex target-string (nreverse result-list)
548
:start start :end end :sharedp sharedp)
549
(push match result-list))))
552
(define-compiler-macro all-matches-as-strings (&whole form &environment env regex &rest rest)
553
"Make sure that constant forms are compiled into scanners at
555
(cond ((constantp regex env)
556
`(all-matches-as-strings
558
(create-scanner ,regex))
562
(defun split (regex target-string
564
(end (length target-string))
569
(declare #.*standard-optimize-settings*)
570
"Matches REGEX against TARGET-STRING as often as possible and
571
returns a list of the substrings between the matches. If
572
WITH-REGISTERS-P is true, substrings corresponding to matched
573
registers are inserted into the list as well. If OMIT-UNMATCHED-P is
574
true, unmatched registers will simply be left out, otherwise they will
575
show up as NIL. LIMIT limits the number of elements returned -
576
registers aren't counted. If LIMIT is NIL (or 0 which is equivalent),
577
trailing empty strings are removed from the result list. If REGEX
578
matches an empty string the scan is continued one position behind this
579
match. If SHAREDP is true, the substrings may share structure with
581
;; initialize list of positions POS-LIST to extract substrings with
582
;; START so that the start of the next match will mark the end of
583
;; the first substring
584
(let ((pos-list (list start))
586
;; how would Larry Wall do it?
589
(do-scans (match-start match-end
591
regex target-string nil
592
:start start :end end)
593
(unless (and (= match-start match-end)
594
(= match-start (car pos-list)))
595
;; push start of match on list unless this would be an empty
596
;; string adjacent to the last element pushed onto the list
598
(>= (incf counter) limit))
600
(push match-start pos-list)
601
(when with-registers-p
602
;; optionally insert matched registers
603
(loop for reg-start across reg-starts
604
for reg-end across reg-ends
606
;; but only if they've matched
607
do (push reg-start pos-list)
608
(push reg-end pos-list)
609
else unless omit-unmatched-p
610
;; or if we're allowed to insert NIL instead
611
do (push nil pos-list)
612
(push nil pos-list)))
614
(push match-end pos-list)))
615
;; end of whole string
617
;; now collect substrings
619
(loop with substr-fn = (if sharedp #'nsubseq #'subseq)
620
with string-seen = nil
621
for (this-end this-start) on pos-list by #'cddr
622
;; skip empty strings from end of list
627
(> this-end this-start)))))
628
collect (if this-start
630
target-string this-start this-end)
634
(define-compiler-macro split (&whole form &environment env regex target-string &rest rest)
635
"Make sure that constant forms are compiled into scanners at compile time."
636
(cond ((constantp regex env)
637
`(split (load-time-value
638
(create-scanner ,regex))
639
,target-string ,@rest))
642
(defun string-case-modifier (str from to start end)
643
(declare #.*standard-optimize-settings*)
644
(declare (type fixnum from to start end))
645
"Checks whether all words in STR between FROM and TO are upcased,
646
downcased or capitalized and returns a function which applies a
647
corresponding case modification to strings. Returns #'IDENTITY
648
otherwise, especially if words in the target area extend beyond FROM
649
or TO. STR is supposed to be bounded by START and END. It is assumed
650
that (<= START FROM TO END)."
654
(alphanumericp (char str (1- from)))
655
(alphanumericp (char str from)))
657
(alphanumericp (char str to))
658
(alphanumericp (char str (1- to)))))
659
;; if it's a zero-length string or if words extend beyond FROM
660
;; or TO we return NIL, i.e. #'IDENTITY
662
;; otherwise we loop through STR from FROM to TO
663
(loop with last-char-both-case
665
for index of-type fixnum from from below to
666
for chr = (char str index)
667
do (cond ((not #-:cormanlisp (both-case-p chr)
668
#+:cormanlisp (or (upper-case-p chr)
670
;; this character doesn't have a case so we
671
;; consider it as a word boundary (note that
672
;; this differs from how \b works in Perl)
673
(setq last-char-both-case nil))
675
;; an uppercase character
677
(if last-char-both-case
678
;; not the first character in a
680
((:undecided) :upcase)
681
((:downcase :capitalize) (return nil))
682
((:upcase) current-result))
685
((:downcase) (return nil))
686
((:capitalize :upcase) current-result)))
687
last-char-both-case t))
689
;; a lowercase character
693
((:undecided) :capitalize)
694
((:downcase) current-result)
695
((:capitalize) (if last-char-both-case
698
((:upcase) (return nil)))
699
last-char-both-case t)))
700
finally (return current-result)))
702
((:undecided :upcase) #'string-upcase)
703
((:downcase) #'string-downcase)
704
((:capitalize) #'string-capitalize)))
706
;; first create a scanner to identify the special parts of the
707
;; replacement string (eat your own dog food...)
709
(defgeneric build-replacement-template (replacement-string)
710
(declare #.*standard-optimize-settings*)
711
(:documentation "Converts a replacement string for REGEX-REPLACE or
712
REGEX-REPLACE-ALL into a replacement template which is an
716
(let* ((*use-bmh-matchers* nil)
717
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
718
(defmethod build-replacement-template ((replacement-string string))
719
(declare #.*standard-optimize-settings*)
721
;; COLLECTOR will hold the (reversed) template
723
;; scan through all special parts of the replacement string
724
(do-matches (match-start match-end reg-scanner replacement-string)
725
(when (< from match-start)
726
;; strings between matches are copied verbatim
727
(push (subseq replacement-string from match-start) collector))
728
;; PARSE-START is true if the pattern matched a number which
729
;; refers to a register
730
(let* ((parse-start (position-if #'digit-char-p
734
(token (if parse-start
735
(1- (parse-integer replacement-string
738
;; if we didn't match a number we convert the
739
;; character to a symbol
740
(case (char replacement-string (1+ match-start))
742
((#\`) :before-match)
744
((#\\) :backslash)))))
745
(when (and (numberp token) (< token 0))
746
;; make sure we don't accept something like "\\0"
747
(signal-ppcre-invocation-error
748
"Illegal substring ~S in replacement string"
749
(subseq replacement-string match-start match-end)))
750
(push token collector))
751
;; remember where the match ended
752
(setq from match-end))
753
(when (< from (length replacement-string))
754
;; push the rest of the replacement string onto the list
755
(push (subseq replacement-string from) collector))
756
(nreverse collector))))
759
(defmethod build-replacement-template ((replacement-function function))
760
(list replacement-function))
763
(defmethod build-replacement-template ((replacement-function-symbol symbol))
764
(list replacement-function-symbol))
767
(defmethod build-replacement-template ((replacement-list list))
770
;;; Corman Lisp's methods can't be closures... :(
772
(let* ((*use-bmh-matchers* nil)
773
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
774
(defun build-replacement-template (replacement)
775
(declare #.*standard-optimize-settings*)
776
(typecase replacement
779
;; COLLECTOR will hold the (reversed) template
781
;; scan through all special parts of the replacement string
782
(do-matches (match-start match-end reg-scanner replacement)
783
(when (< from match-start)
784
;; strings between matches are copied verbatim
785
(push (subseq replacement from match-start) collector))
786
;; PARSE-START is true if the pattern matched a number which
787
;; refers to a register
788
(let* ((parse-start (position-if #'digit-char-p
792
(token (if parse-start
793
(1- (parse-integer replacement
796
;; if we didn't match a number we convert the
797
;; character to a symbol
798
(case (char replacement (1+ match-start))
800
((#\`) :before-match)
802
((#\\) :backslash)))))
803
(when (and (numberp token) (< token 0))
804
;; make sure we don't accept something like "\\0"
805
(signal-ppcre-invocation-error
806
"Illegal substring ~S in replacement string"
807
(subseq replacement match-start match-end)))
808
(push token collector))
809
;; remember where the match ended
810
(setq from match-end))
811
(when (< from (length replacement))
812
;; push the rest of the replacement string onto the list
813
(push (nsubseq replacement from) collector))
814
(nreverse collector)))
818
(list replacement)))))
820
(defun build-replacement (replacement-template
823
match-start match-end
826
(declare #.*standard-optimize-settings*)
827
"Accepts a replacement template and the current values from the
828
matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
829
corresponding template."
830
;; the upper exclusive bound of the register numbers in the regular
832
(let ((reg-bound (if reg-starts
833
(array-dimension reg-starts 0)
835
(with-output-to-string (s)
836
(loop for token in replacement-template
839
;; transfer string parts verbatim
840
(write-string token s))
842
;; replace numbers with the corresponding registers
843
(when (>= token reg-bound)
844
;; but only if the register was referenced in the
845
;; regular expression
846
(signal-ppcre-invocation-error
847
"Reference to non-existent register ~A in replacement string"
849
(when (svref reg-starts token)
850
;; and only if it matched, i.e. no match results
851
;; in an empty string
852
(write-string target-string s
853
:start (svref reg-starts token)
854
:end (svref reg-ends token))))
859
(nsubseq target-string match-start match-end)
861
(lambda (reg-start reg-end)
863
(nsubseq target-string reg-start reg-end)))
864
reg-starts reg-ends)))
869
match-start match-end
870
reg-starts reg-ends)))
879
(write-string target-string s
883
;; the part of the target string before the match
884
(write-string target-string s
888
;; the part of the target string after the match
889
(write-string target-string s
896
(nsubseq target-string match-start match-end)
898
(lambda (reg-start reg-end)
900
(nsubseq target-string reg-start reg-end)))
901
reg-starts reg-ends)))
906
match-start match-end
907
reg-starts reg-ends)))
910
(defun replace-aux (target-string replacement pos-list reg-list
911
start end preserve-case simple-calls)
912
(declare #.*standard-optimize-settings*)
913
"Auxiliary function used by REGEX-REPLACE and
914
REGEX-REPLACE-ALL. POS-LIST contains a list with the start and end
915
positions of all matches while REG-LIST contains a list of arrays
916
representing the corresponding register start and end positions."
917
;; build the template once before we start the loop
918
(let ((replacement-template (build-replacement-template replacement)))
919
(with-output-to-string (s)
920
;; loop through all matches and take the start and end of the
921
;; whole string into account
922
(loop for (from to) on (append (list start) pos-list (list end))
923
;; alternate between replacement and no replacement
924
for replace = nil then (and (not replace) to)
925
for reg-starts = (if replace (pop reg-list) nil)
926
for reg-ends = (if replace (pop reg-list) nil)
927
for curr-replacement = (if replace
928
;; build the replacement string
929
(build-replacement replacement-template
938
do (write-string (if preserve-case
939
;; modify the case of the replacement
940
;; string if necessary
941
(funcall (string-case-modifier target-string
949
do (write-string target-string s :start from :end to)))))
951
(defun regex-replace (regex target-string replacement
953
(end (length target-string))
956
(declare #.*standard-optimize-settings*)
957
"Try to match TARGET-STRING between START and END against REGEX and
958
replace the first match with REPLACEMENT.
960
REPLACEMENT can be a string which may contain the special substrings
961
\"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
962
before the match, \"\\'\" for the part of TARGET-STRING after the
963
match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
966
REPLACEMENT can also be a function designator in which case the
967
match will be replaced with the result of calling the function
968
designated by REPLACEMENT with the arguments TARGET-STRING, START,
969
END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
970
REG-ENDS are arrays holding the start and end positions of matched
971
registers or NIL - the meaning of the other arguments should be
974
Finally, REPLACEMENT can be a list where each element is a string,
975
one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
976
corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
977
representing register (1+ N) -, or a function designator.
979
If PRESERVE-CASE is true, the replacement will try to preserve the
980
case (all upper case, all lower case, or capitalized) of the
981
match. The result will always be a fresh string, even if REGEX doesn't
983
(multiple-value-bind (match-start match-end reg-starts reg-ends)
984
(scan regex target-string :start start :end end)
986
(replace-aux target-string replacement
987
(list match-start match-end)
988
(list reg-starts reg-ends)
989
start end preserve-case simple-calls)
990
(subseq target-string start end))))
993
(define-compiler-macro regex-replace
994
(&whole form &environment env regex target-string replacement &rest rest)
995
"Make sure that constant forms are compiled into scanners at compile time."
996
(cond ((constantp regex env)
997
`(regex-replace (load-time-value
998
(create-scanner ,regex))
999
,target-string ,replacement ,@rest))
1002
(defun regex-replace-all (regex target-string replacement
1004
(end (length target-string))
1007
(declare #.*standard-optimize-settings*)
1008
"Try to match TARGET-STRING between START and END against REGEX and
1009
replace all matches with REPLACEMENT.
1011
REPLACEMENT can be a string which may contain the special substrings
1012
\"\\&\" for the whole match, \"\\`\" for the part of TARGET-STRING
1013
before the match, \"\\'\" for the part of TARGET-STRING after the
1014
match, \"\\N\" or \"\\{N}\" for the Nth register where N is a positive
1017
REPLACEMENT can also be a function designator in which case the
1018
match will be replaced with the result of calling the function
1019
designated by REPLACEMENT with the arguments TARGET-STRING, START,
1020
END, MATCH-START, MATCH-END, REG-STARTS, and REG-ENDS. (REG-STARTS and
1021
REG-ENDS are arrays holding the start and end positions of matched
1022
registers or NIL - the meaning of the other arguments should be
1025
Finally, REPLACEMENT can be a list where each element is a string,
1026
one of the symbols :MATCH, :BEFORE-MATCH, or :AFTER-MATCH -
1027
corresponding to \"\\&\", \"\\`\", and \"\\'\" above -, an integer N -
1028
representing register (1+ N) -, or a function designator.
1030
If PRESERVE-CASE is true, the replacement will try to preserve the
1031
case (all upper case, all lower case, or capitalized) of the
1032
match. The result will always be a fresh string, even if REGEX doesn't
1034
(let ((pos-list '())
1036
(do-scans (match-start match-end reg-starts reg-ends regex target-string
1038
:start start :end end)
1039
(push match-start pos-list)
1040
(push match-end pos-list)
1041
(push reg-starts reg-list)
1042
(push reg-ends reg-list))
1044
(replace-aux target-string replacement
1047
start end preserve-case simple-calls)
1048
(subseq target-string start end))))
1051
(define-compiler-macro regex-replace-all
1052
(&whole form &environment env regex target-string replacement &rest rest)
1053
"Make sure that constant forms are compiled into scanners at compile time."
1054
(cond ((constantp regex env)
1055
`(regex-replace-all (load-time-value
1056
(create-scanner ,regex))
1057
,target-string ,replacement ,@rest))
1061
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
1063
"Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST. Loops
1064
through PACKAGES and executes BODY with SYMBOL bound to each symbol
1065
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
1066
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
1067
scanner, a case-insensitive scanner is used."
1068
(with-rebinding (regex)
1069
(with-unique-names (scanner %packages next morep)
1070
`(let* ((,scanner (create-scanner ,regex
1071
:case-insensitive-mode
1072
(and ,case-insensitive
1073
(not (functionp ,regex)))))
1074
(,%packages (or ,packages
1075
(list-all-packages))))
1076
(with-package-iterator (,next ,%packages :external :internal :inherited)
1078
(multiple-value-bind (,morep symbol)
1081
(return ,return-form))
1082
(when (scan ,scanner (symbol-name symbol))
1085
;;; The following two functions were provided by Karsten Poeck
1088
(defmacro do-with-all-symbols ((variable package-packagelist) &body body)
1089
(with-unique-names (pack-var iter-sym)
1090
`(if (listp ,package-packagelist)
1091
(dolist (,pack-var ,package-packagelist)
1092
(do-symbols (,iter-sym ,pack-var)
1094
(do-symbols (,iter-sym ,package-packagelist)
1098
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
1100
"Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST. Loops
1101
through PACKAGES and executes BODY with SYMBOL bound to each symbol
1102
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
1103
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
1104
scanner, a case-insensitive scanner is used."
1105
(with-rebinding (regex)
1106
(with-unique-names (scanner %packages)
1107
`(let* ((,scanner (create-scanner ,regex
1108
:case-insensitive-mode
1109
(and ,case-insensitive
1110
(not (functionp ,regex)))))
1111
(,%packages (or ,packages
1112
(list-all-packages))))
1113
(do-with-all-symbols (symbol ,%packages)
1114
(when (scan ,scanner (symbol-name symbol))
1118
(defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
1119
(declare #.*standard-optimize-settings*)
1120
"Similar to the standard function APROPOS-LIST but returns a list of
1121
all symbols which match the regular expression REGEX. If
1122
CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
1123
case-insensitive scanner is used."
1124
(let ((collector '()))
1125
(regex-apropos-aux (regex packages case-insensitive collector)
1126
(push symbol collector))))
1128
(defun print-symbol-info (symbol)
1129
"Auxiliary function used by REGEX-APROPOS. Tries to print some
1130
meaningful information about a symbol."
1131
(declare #.*standard-optimize-settings*)
1133
(let ((output-list '()))
1134
(cond ((special-operator-p symbol)
1135
(push "[special operator]" output-list))
1136
((macro-function symbol)
1137
(push "[macro]" output-list))
1139
(let* ((function (symbol-function symbol))
1140
(compiledp (compiled-function-p function)))
1141
(multiple-value-bind (lambda-expr closurep)
1142
(function-lambda-expression function)
1145
"[~:[~;compiled ~]~:[function~;closure~]]~:[~; ~A~]"
1146
compiledp closurep lambda-expr (cadr lambda-expr))
1148
(let ((class (find-class symbol nil)))
1150
(push (format nil "[class] ~S" class) output-list)))
1151
(cond ((keywordp symbol)
1152
(push "[keyword]" output-list))
1154
(push (format nil "[constant]~:[~; value: ~S~]"
1155
(boundp symbol) (symbol-value symbol)) output-list))
1157
(push #+(or LispWorks CLISP) "[variable]"
1158
#-(or LispWorks CLISP) (format nil "[variable] value: ~S"
1159
(symbol-value symbol))
1161
#-(or :cormanlisp :clisp)
1162
(format t "~&~S ~<~;~^~A~@{~:@_~A~}~;~:>" symbol output-list)
1163
#+(or :cormanlisp :clisp)
1164
(loop for line in output-list
1165
do (format t "~&~S ~A" symbol line)))
1167
;; this seems to be necessary due to some errors I encountered
1169
(format t "~&~S [an error occured while trying to print more info]" symbol))))
1171
(defun regex-apropos (regex &optional packages &key (case-insensitive t))
1172
"Similar to the standard function APROPOS but returns a list of all
1173
symbols which match the regular expression REGEX. If CASE-INSENSITIVE
1174
is true and REGEX isn't already a scanner, a case-insensitive scanner
1176
(declare #.*standard-optimize-settings*)
1177
(regex-apropos-aux (regex packages case-insensitive)
1178
(print-symbol-info symbol))
1181
(let* ((*use-bmh-matchers* nil)
1182
(non-word-char-scanner (create-scanner "[^a-zA-Z_0-9]")))
1183
(defun quote-meta-chars (string &key (start 0) (end (length string)))
1184
"Quote, i.e. prefix with #\\\\, all non-word characters in STRING."
1185
(regex-replace-all non-word-char-scanner string "\\\\\\&"
1186
:start start :end end)))
1188
(let* ((*use-bmh-matchers* nil)
1189
(*allow-quoting* nil)
1190
(quote-char-scanner (create-scanner "\\\\Q"))
1191
(section-scanner (create-scanner "\\\\Q((?:[^\\\\]|\\\\(?!Q))*?)(?:\\\\E|$)")))
1192
(defun quote-sections (string)
1193
"Replace sections inside of STRING which are enclosed by \\Q and
1194
\\E with the quoted equivalent of these sections \(see
1195
QUOTE-META-CHARS). Repeat this as long as there are such
1196
sections. These sections may nest."
1197
(flet ((quote-substring (target-string start end match-start
1198
match-end reg-starts reg-ends)
1199
(declare (ignore start end match-start match-end))
1200
(quote-meta-chars target-string
1201
:start (svref reg-starts 0)
1202
:end (svref reg-ends 0))))
1203
(loop for result = string then (regex-replace-all section-scanner
1206
while (scan quote-char-scanner result)
1207
finally (return result)))))
1209
(let* ((*use-bmh-matchers* nil)
1210
(comment-scanner (create-scanner "(?s)\\(\\?#.*?\\)"))
1211
(extended-comment-scanner (create-scanner "(?m:#.*?$)|(?s:\\(\\?#.*?\\))"))
1212
(quote-token-scanner "\\\\[QE]")
1213
(quote-token-replace-scanner "\\\\([QE])"))
1214
(defun clean-comments (string &optional extended-mode)
1215
"Clean \(?#...) comments within STRING for quoting, i.e. convert
1216
\\Q to Q and \\E to E. If EXTENDED-MODE is true, also clean
1217
end-of-line comments, i.e. those starting with #\\# and ending with
1219
(flet ((remove-tokens (target-string start end match-start
1220
match-end reg-starts reg-ends)
1221
(declare (ignore start end reg-starts reg-ends))
1222
(loop for result = (nsubseq target-string match-start match-end)
1223
then (regex-replace-all quote-token-replace-scanner result "\\1")
1224
;; we must probably repeat this because the comment
1225
;; can contain substrings like \\Q
1226
while (scan quote-token-scanner result)
1227
finally (return result))))
1228
(regex-replace-all (if extended-mode
1229
extended-comment-scanner
1234
(defun parse-tree-synonym (symbol)
1235
"Returns the parse tree the SYMBOL symbol is a synonym for. Returns
1236
NIL is SYMBOL wasn't yet defined to be a synonym."
1237
(get symbol 'parse-tree-synonym))
1239
(defun (setf parse-tree-synonym) (new-parse-tree symbol)
1240
"Defines SYMBOL to be a synonm for the parse tree NEW-PARSE-TREE."
1241
(setf (get symbol 'parse-tree-synonym) new-parse-tree))
1243
(defmacro define-parse-tree-synonym (name parse-tree)
1244
"Defines the symbol NAME to be a synonym for the parse tree
1245
PARSE-TREE. Both arguments are quoted."
1246
`(eval-when (:compile-toplevel :load-toplevel :execute)
1247
(setf (parse-tree-synonym ',name) ',parse-tree)))