Coverage report: /home/jsnell/.sbcl/site/cl-ppcre-1.2.13/repetition-closures.lisp
Kind | Covered | All | % |
expression | 621 | 927 | 67.0 |
branch | 122 | 174 | 70.1 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
2
;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.24 2005/04/13 15:35:58 edi Exp $
4
;;; This is actually a part of closures.lisp which we put into a
5
;;; separate file because it is rather complex. We only deal with
6
;;; REPETITIONs here. Note that this part of the code contains some
7
;;; rather crazy micro-optimizations which were introduced to be as
8
;;; competitive with Perl as possible in tight loops.
10
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
12
;;; Redistribution and use in source and binary forms, with or without
13
;;; modification, are permitted provided that the following conditions
16
;;; * Redistributions of source code must retain the above copyright
17
;;; notice, this list of conditions and the following disclaimer.
19
;;; * Redistributions in binary form must reproduce the above
20
;;; copyright notice, this list of conditions and the following
21
;;; disclaimer in the documentation and/or other materials
22
;;; provided with the distribution.
24
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
25
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
28
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
30
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
(defmacro incf-after (place &optional (delta 1) &environment env)
39
"Utility macro inspired by C's \"place++\", i.e. first return the
40
value of PLACE and afterwards increment it by DELTA."
41
(with-unique-names (%temp)
42
(multiple-value-bind (vars vals store-vars writer-form reader-form)
43
(get-setf-expansion place env)
44
`(let* (,@(mapcar #'list vars vals)
46
(,(car store-vars) (+ ,%temp ,delta)))
50
;; code for greedy repetitions with minimum zero
52
(defmacro greedy-constant-length-closure (check-curr-pos)
53
"This is the template for simple greedy repetitions (where simple
54
means that the minimum number of repetitions is zero, that the inner
55
regex to be checked is of fixed length LEN, and that it doesn't
56
contain registers, i.e. there's no need for backtracking).
57
CHECK-CURR-POS is a form which checks whether the inner regex of the
58
repetition matches at CURR-POS."
61
(declare (type fixnum start-pos maximum))
62
;; because we know LEN we know in advance where to stop at the
63
;; latest; we also take into consideration MIN-REST, i.e. the
64
;; minimal length of the part behind the repetition
65
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
66
;; don't go further than MAXIMUM
67
;; repetitions, of course
69
(the fixnum (* len maximum)))))
71
(declare (type fixnum target-end-pos curr-pos))
72
(block greedy-constant-length-matcher
73
;; we use an ugly TAGBODY construct because this might be a
74
;; tight loop and this version is a bit faster than our LOOP
75
;; version (at least in CMUCL)
78
;; first go forward as far as possible, i.e. while
79
;; the inner regex matches
80
(when (>= curr-pos target-end-pos)
86
;; now go back LEN steps each until we're able to match
87
;; the rest of the regex
88
(when (< curr-pos start-pos)
89
(return-from greedy-constant-length-matcher nil))
90
(let ((result (funcall next-fn curr-pos)))
92
(return-from greedy-constant-length-matcher result)))
94
(go backward-loop)))))
95
;; basically the same code; it's just a bit easier because we're
96
;; not bounded by MAXIMUM
98
(declare (type fixnum start-pos))
99
(let ((target-end-pos (1+ (- *end-pos* len min-rest)))
100
(curr-pos start-pos))
101
(declare (type fixnum target-end-pos curr-pos))
102
(block greedy-constant-length-matcher
105
(when (>= curr-pos target-end-pos)
107
(when ,check-curr-pos
111
(when (< curr-pos start-pos)
112
(return-from greedy-constant-length-matcher nil))
113
(let ((result (funcall next-fn curr-pos)))
115
(return-from greedy-constant-length-matcher result)))
117
(go backward-loop)))))))
119
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
120
(declare #.*standard-optimize-settings*)
121
(declare (type fixnum min-rest)
122
(type function next-fn))
123
"Creates a closure which just matches as far ahead as possible,
124
i.e. a closure for a dot in single-line mode."
127
(declare (type fixnum start-pos maximum))
128
;; because we know LEN we know in advance where to stop at the
129
;; latest; we also take into consideration MIN-REST, i.e. the
130
;; minimal length of the part behind the repetition
131
(let ((target-end-pos (min (+ start-pos maximum)
132
(- *end-pos* min-rest))))
133
(declare (type fixnum target-end-pos))
134
;; start from the highest possible position and go backward
135
;; until we're able to match the rest of the regex
136
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
137
thereis (funcall next-fn curr-pos))))
138
;; basically the same code; it's just a bit easier because we're
139
;; not bounded by MAXIMUM
141
(declare (type fixnum start-pos))
142
(let ((target-end-pos (- *end-pos* min-rest)))
143
(declare (type fixnum target-end-pos))
144
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
145
thereis (funcall next-fn curr-pos))))))
147
(defgeneric create-greedy-constant-length-matcher (repetition next-fn)
148
(declare #.*standard-optimize-settings*)
149
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
150
that REPETITION is greedy and the minimal number of repetitions is
151
zero. It is furthermore assumed that the inner regex of REPETITION is
152
of fixed length and doesn't contain registers."))
154
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
156
(declare #.*standard-optimize-settings*)
157
(let ((len (len repetition))
158
(maximum (maximum repetition))
159
(regex (regex repetition))
160
(min-rest (min-rest repetition)))
161
(declare (type fixnum len min-rest)
162
(type function next-fn))
164
;; inner regex has zero-length, so we can discard it
168
;; now first try to optimize for a couple of common cases
171
(let ((str (str regex)))
173
;; a single character
174
(let ((chr (schar str 0)))
175
(if (case-insensitive-p regex)
176
(greedy-constant-length-closure
177
(char-equal chr (schar *string* curr-pos)))
178
(greedy-constant-length-closure
179
(char= chr (schar *string* curr-pos)))))
181
(if (case-insensitive-p regex)
182
(greedy-constant-length-closure
183
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
184
(greedy-constant-length-closure
185
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
188
(insert-char-class-tester (regex (schar *string* curr-pos))
189
(if (invertedp regex)
190
(greedy-constant-length-closure
191
(not (char-class-test)))
192
(greedy-constant-length-closure
193
(char-class-test)))))
195
;; an EVERYTHING object, i.e. a dot
196
(if (single-line-p regex)
197
(create-greedy-everything-matcher maximum min-rest next-fn)
198
(greedy-constant-length-closure
199
(char/= #\Newline (schar *string* curr-pos)))))
201
;; the general case - we build an inner matcher which
202
;; just checks for immediate success, i.e. NEXT-FN is
204
(let ((inner-matcher (create-matcher-aux regex #'identity)))
205
(declare (type function inner-matcher))
206
(greedy-constant-length-closure
207
(funcall inner-matcher curr-pos)))))))))
209
(defgeneric create-greedy-no-zero-matcher (repetition next-fn)
210
(declare #.*standard-optimize-settings*)
211
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
212
that REPETITION is greedy and the minimal number of repetitions is
213
zero. It is furthermore assumed that the inner regex of REPETITION can
214
never match a zero-length string (or instead the maximal number of
215
repetitions is 1)."))
217
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
218
(declare #.*standard-optimize-settings*)
219
(let ((maximum (maximum repetition))
220
;; REPEAT-MATCHER is part of the closure's environment but it
221
;; can only be defined after GREEDY-AUX is defined
223
(declare (type function next-fn))
226
;; this is essentially like the next case but with a known
227
;; MAXIMUM of 1 we can get away without a counter; note that
228
;; we always arrive here if CONVERT optimizes <regex>* to
229
;; (?:<regex'>*<regex>)?
231
(create-matcher-aux (regex repetition) next-fn))
233
(declare (type function repeat-matcher))
234
(or (funcall repeat-matcher start-pos)
235
(funcall next-fn start-pos))))
237
;; we make a reservation for our slot in *REPEAT-COUNTERS*
238
;; because we need to keep track whether we've reached MAXIMUM
240
(let ((rep-num (incf-after *rep-num*)))
241
(flet ((greedy-aux (start-pos)
242
(declare (type fixnum start-pos maximum rep-num)
243
(type function repeat-matcher))
244
;; the actual matcher which first tries to match the
245
;; inner regex of REPETITION (if we haven't done so
246
;; too often) and on failure calls NEXT-FN
247
(or (and (< (aref *repeat-counters* rep-num) maximum)
248
(incf (aref *repeat-counters* rep-num))
249
;; note that REPEAT-MATCHER will call
250
;; GREEDY-AUX again recursively
252
(funcall repeat-matcher start-pos)
253
(decf (aref *repeat-counters* rep-num))))
254
(funcall next-fn start-pos))))
255
;; create a closure to match the inner regex and to
256
;; implement backtracking via GREEDY-AUX
258
(create-matcher-aux (regex repetition) #'greedy-aux))
259
;; the closure we return is just a thin wrapper around
260
;; GREEDY-AUX to initialize the repetition counter
262
(declare (type fixnum start-pos))
263
(setf (aref *repeat-counters* rep-num) 0)
264
(greedy-aux start-pos)))))
266
;; easier code because we're not bounded by MAXIMUM, but
267
;; basically the same
268
(flet ((greedy-aux (start-pos)
269
(declare (type fixnum start-pos)
270
(type function repeat-matcher))
271
(or (funcall repeat-matcher start-pos)
272
(funcall next-fn start-pos))))
274
(create-matcher-aux (regex repetition) #'greedy-aux))
277
(defgeneric create-greedy-matcher (repetition next-fn)
278
(declare #.*standard-optimize-settings*)
279
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
280
that REPETITION is greedy and the minimal number of repetitions is
283
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
284
(declare #.*standard-optimize-settings*)
285
(let ((maximum (maximum repetition))
286
;; we make a reservation for our slot in *LAST-POS-STORES* because
287
;; we have to watch out for endless loops as the inner regex might
288
;; match zero-length strings
289
(zero-length-num (incf-after *zero-length-num*))
290
;; REPEAT-MATCHER is part of the closure's environment but it
291
;; can only be defined after GREEDY-AUX is defined
293
(declare (type fixnum zero-length-num)
294
(type function next-fn))
297
;; we make a reservation for our slot in *REPEAT-COUNTERS*
298
;; because we need to keep track whether we've reached MAXIMUM
300
(let ((rep-num (incf-after *rep-num*)))
301
(flet ((greedy-aux (start-pos)
302
;; the actual matcher which first tries to match the
303
;; inner regex of REPETITION (if we haven't done so
304
;; too often) and on failure calls NEXT-FN
305
(declare (type fixnum start-pos maximum rep-num)
306
(type function repeat-matcher))
308
(svref *last-pos-stores* zero-length-num)))
309
(when (and old-last-pos
310
(= (the fixnum old-last-pos) start-pos))
311
;; stop immediately if we've been here before,
312
;; i.e. if the last attempt matched a zero-length
314
(return-from greedy-aux (funcall next-fn start-pos)))
315
;; otherwise remember this position for the next
317
(setf (svref *last-pos-stores* zero-length-num) start-pos)
318
(or (and (< (aref *repeat-counters* rep-num) maximum)
319
(incf (aref *repeat-counters* rep-num))
320
;; note that REPEAT-MATCHER will call
321
;; GREEDY-AUX again recursively
323
(funcall repeat-matcher start-pos)
324
(decf (aref *repeat-counters* rep-num))
325
(setf (svref *last-pos-stores* zero-length-num)
327
(funcall next-fn start-pos)))))
328
;; create a closure to match the inner regex and to
329
;; implement backtracking via GREEDY-AUX
331
(create-matcher-aux (regex repetition) #'greedy-aux))
332
;; the closure we return is just a thin wrapper around
333
;; GREEDY-AUX to initialize the repetition counter and our
334
;; slot in *LAST-POS-STORES*
336
(declare (type fixnum start-pos))
337
(setf (aref *repeat-counters* rep-num) 0
338
(svref *last-pos-stores* zero-length-num) nil)
339
(greedy-aux start-pos)))))
341
;; easier code because we're not bounded by MAXIMUM, but
342
;; basically the same
343
(flet ((greedy-aux (start-pos)
344
(declare (type fixnum start-pos)
345
(type function repeat-matcher))
347
(svref *last-pos-stores* zero-length-num)))
348
(when (and old-last-pos
349
(= (the fixnum old-last-pos) start-pos))
350
(return-from greedy-aux (funcall next-fn start-pos)))
351
(setf (svref *last-pos-stores* zero-length-num) start-pos)
353
(funcall repeat-matcher start-pos)
354
(setf (svref *last-pos-stores* zero-length-num) old-last-pos))
355
(funcall next-fn start-pos)))))
357
(create-matcher-aux (regex repetition) #'greedy-aux))
359
(declare (type fixnum start-pos))
360
(setf (svref *last-pos-stores* zero-length-num) nil)
361
(greedy-aux start-pos)))))))
363
;; code for non-greedy repetitions with minimum zero
365
(defmacro non-greedy-constant-length-closure (check-curr-pos)
366
"This is the template for simple non-greedy repetitions (where
367
simple means that the minimum number of repetitions is zero, that the
368
inner regex to be checked is of fixed length LEN, and that it doesn't
369
contain registers, i.e. there's no need for backtracking).
370
CHECK-CURR-POS is a form which checks whether the inner regex of the
371
repetition matches at CURR-POS."
374
(declare (type fixnum start-pos maximum))
375
;; because we know LEN we know in advance where to stop at the
376
;; latest; we also take into consideration MIN-REST, i.e. the
377
;; minimal length of the part behind the repetition
378
(let ((target-end-pos (min (1+ (- *end-pos* len min-rest))
380
(the fixnum (* len maximum))))))
381
;; move forward by LEN and always try NEXT-FN first, then
383
(loop for curr-pos of-type fixnum from start-pos
386
thereis (funcall next-fn curr-pos)
387
while ,check-curr-pos
388
finally (return (funcall next-fn curr-pos)))))
389
;; basically the same code; it's just a bit easier because we're
390
;; not bounded by MAXIMUM
392
(declare (type fixnum start-pos))
393
(let ((target-end-pos (1+ (- *end-pos* len min-rest))))
394
(loop for curr-pos of-type fixnum from start-pos
397
thereis (funcall next-fn curr-pos)
398
while ,check-curr-pos
399
finally (return (funcall next-fn curr-pos)))))))
401
(defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
402
(declare #.*standard-optimize-settings*)
403
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
404
that REPETITION is non-greedy and the minimal number of repetitions is
405
zero. It is furthermore assumed that the inner regex of REPETITION is
406
of fixed length and doesn't contain registers."))
408
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
409
(declare #.*standard-optimize-settings*)
410
(let ((len (len repetition))
411
(maximum (maximum repetition))
412
(regex (regex repetition))
413
(min-rest (min-rest repetition)))
414
(declare (type fixnum len min-rest)
415
(type function next-fn))
417
;; inner regex has zero-length, so we can discard it
421
;; now first try to optimize for a couple of common cases
424
(let ((str (str regex)))
426
;; a single character
427
(let ((chr (schar str 0)))
428
(if (case-insensitive-p regex)
429
(non-greedy-constant-length-closure
430
(char-equal chr (schar *string* curr-pos)))
431
(non-greedy-constant-length-closure
432
(char= chr (schar *string* curr-pos)))))
434
(if (case-insensitive-p regex)
435
(non-greedy-constant-length-closure
436
(*string*-equal str curr-pos (+ curr-pos len) 0 len))
437
(non-greedy-constant-length-closure
438
(*string*= str curr-pos (+ curr-pos len) 0 len))))))
441
(insert-char-class-tester (regex (schar *string* curr-pos))
442
(if (invertedp regex)
443
(non-greedy-constant-length-closure
444
(not (char-class-test)))
445
(non-greedy-constant-length-closure
446
(char-class-test)))))
448
(if (single-line-p regex)
449
;; a dot which really can match everything; we rely
450
;; on the compiler to optimize this away
451
(non-greedy-constant-length-closure
453
;; a dot which has to watch out for #\Newline
454
(non-greedy-constant-length-closure
455
(char/= #\Newline (schar *string* curr-pos)))))
457
;; the general case - we build an inner matcher which
458
;; just checks for immediate success, i.e. NEXT-FN is
460
(let ((inner-matcher (create-matcher-aux regex #'identity)))
461
(declare (type function inner-matcher))
462
(non-greedy-constant-length-closure
463
(funcall inner-matcher curr-pos)))))))))
465
(defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
466
(declare #.*standard-optimize-settings*)
467
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
468
that REPETITION is non-greedy and the minimal number of repetitions is
469
zero. It is furthermore assumed that the inner regex of REPETITION can
470
never match a zero-length string (or instead the maximal number of
471
repetitions is 1)."))
473
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
474
(declare #.*standard-optimize-settings*)
475
(let ((maximum (maximum repetition))
476
;; REPEAT-MATCHER is part of the closure's environment but it
477
;; can only be defined after NON-GREEDY-AUX is defined
479
(declare (type function next-fn))
482
;; this is essentially like the next case but with a known
483
;; MAXIMUM of 1 we can get away without a counter
485
(create-matcher-aux (regex repetition) next-fn))
487
(declare (type function repeat-matcher))
488
(or (funcall next-fn start-pos)
489
(funcall repeat-matcher start-pos))))
491
;; we make a reservation for our slot in *REPEAT-COUNTERS*
492
;; because we need to keep track whether we've reached MAXIMUM
494
(let ((rep-num (incf-after *rep-num*)))
495
(flet ((non-greedy-aux (start-pos)
496
;; the actual matcher which first calls NEXT-FN and
497
;; on failure tries to match the inner regex of
498
;; REPETITION (if we haven't done so too often)
499
(declare (type fixnum start-pos maximum rep-num)
500
(type function repeat-matcher))
501
(or (funcall next-fn start-pos)
502
(and (< (aref *repeat-counters* rep-num) maximum)
503
(incf (aref *repeat-counters* rep-num))
504
;; note that REPEAT-MATCHER will call
505
;; NON-GREEDY-AUX again recursively
507
(funcall repeat-matcher start-pos)
508
(decf (aref *repeat-counters* rep-num)))))))
509
;; create a closure to match the inner regex and to
510
;; implement backtracking via NON-GREEDY-AUX
512
(create-matcher-aux (regex repetition) #'non-greedy-aux))
513
;; the closure we return is just a thin wrapper around
514
;; NON-GREEDY-AUX to initialize the repetition counter
516
(declare (type fixnum start-pos))
517
(setf (aref *repeat-counters* rep-num) 0)
518
(non-greedy-aux start-pos)))))
520
;; easier code because we're not bounded by MAXIMUM, but
521
;; basically the same
522
(flet ((non-greedy-aux (start-pos)
523
(declare (type fixnum start-pos)
524
(type function repeat-matcher))
525
(or (funcall next-fn start-pos)
526
(funcall repeat-matcher start-pos))))
528
(create-matcher-aux (regex repetition) #'non-greedy-aux))
529
#'non-greedy-aux)))))
531
(defgeneric create-non-greedy-matcher (repetition next-fn)
532
(declare #.*standard-optimize-settings*)
533
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
534
that REPETITION is non-greedy and the minimal number of repetitions is
537
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
538
(declare #.*standard-optimize-settings*)
539
;; we make a reservation for our slot in *LAST-POS-STORES* because
540
;; we have to watch out for endless loops as the inner regex might
541
;; match zero-length strings
542
(let ((zero-length-num (incf-after *zero-length-num*))
543
(maximum (maximum repetition))
544
;; REPEAT-MATCHER is part of the closure's environment but it
545
;; can only be defined after NON-GREEDY-AUX is defined
547
(declare (type fixnum zero-length-num)
548
(type function next-fn))
551
;; we make a reservation for our slot in *REPEAT-COUNTERS*
552
;; because we need to keep track whether we've reached MAXIMUM
554
(let ((rep-num (incf-after *rep-num*)))
555
(flet ((non-greedy-aux (start-pos)
556
;; the actual matcher which first calls NEXT-FN and
557
;; on failure tries to match the inner regex of
558
;; REPETITION (if we haven't done so too often)
559
(declare (type fixnum start-pos maximum rep-num)
560
(type function repeat-matcher))
562
(svref *last-pos-stores* zero-length-num)))
563
(when (and old-last-pos
564
(= (the fixnum old-last-pos) start-pos))
565
;; stop immediately if we've been here before,
566
;; i.e. if the last attempt matched a zero-length
568
(return-from non-greedy-aux (funcall next-fn start-pos)))
569
;; otherwise remember this position for the next
571
(setf (svref *last-pos-stores* zero-length-num) start-pos)
572
(or (funcall next-fn start-pos)
573
(and (< (aref *repeat-counters* rep-num) maximum)
574
(incf (aref *repeat-counters* rep-num))
575
;; note that REPEAT-MATCHER will call
576
;; NON-GREEDY-AUX again recursively
578
(funcall repeat-matcher start-pos)
579
(decf (aref *repeat-counters* rep-num))
580
(setf (svref *last-pos-stores* zero-length-num)
582
;; create a closure to match the inner regex and to
583
;; implement backtracking via NON-GREEDY-AUX
585
(create-matcher-aux (regex repetition) #'non-greedy-aux))
586
;; the closure we return is just a thin wrapper around
587
;; NON-GREEDY-AUX to initialize the repetition counter and our
588
;; slot in *LAST-POS-STORES*
590
(declare (type fixnum start-pos))
591
(setf (aref *repeat-counters* rep-num) 0
592
(svref *last-pos-stores* zero-length-num) nil)
593
(non-greedy-aux start-pos)))))
595
;; easier code because we're not bounded by MAXIMUM, but
596
;; basically the same
597
(flet ((non-greedy-aux (start-pos)
598
(declare (type fixnum start-pos)
599
(type function repeat-matcher))
601
(svref *last-pos-stores* zero-length-num)))
602
(when (and old-last-pos
603
(= (the fixnum old-last-pos) start-pos))
604
(return-from non-greedy-aux (funcall next-fn start-pos)))
605
(setf (svref *last-pos-stores* zero-length-num) start-pos)
606
(or (funcall next-fn start-pos)
608
(funcall repeat-matcher start-pos)
609
(setf (svref *last-pos-stores* zero-length-num)
612
(create-matcher-aux (regex repetition) #'non-greedy-aux))
614
(declare (type fixnum start-pos))
615
(setf (svref *last-pos-stores* zero-length-num) nil)
616
(non-greedy-aux start-pos)))))))
618
;; code for constant repetitions, i.e. those with a fixed number of repetitions
620
(defmacro constant-repetition-constant-length-closure (check-curr-pos)
621
"This is the template for simple constant repetitions (where simple
622
means that the inner regex to be checked is of fixed length LEN, and
623
that it doesn't contain registers, i.e. there's no need for
624
backtracking) and where constant means that MINIMUM is equal to
625
MAXIMUM. CHECK-CURR-POS is a form which checks whether the inner regex
626
of the repetition matches at CURR-POS."
628
(declare (type fixnum start-pos))
629
(let ((target-end-pos (+ start-pos
630
(the fixnum (* len repetitions)))))
631
(declare (type fixnum target-end-pos))
632
;; first check if we won't go beyond the end of the string
633
(and (>= *end-pos* target-end-pos)
634
;; then loop through all repetitions step by step
635
(loop for curr-pos of-type fixnum from start-pos
638
always ,check-curr-pos)
639
;; finally call NEXT-FN if we made it that far
640
(funcall next-fn target-end-pos)))))
642
(defgeneric create-constant-repetition-constant-length-matcher
644
(declare #.*standard-optimize-settings*)
645
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
646
that REPETITION has a constant number of repetitions. It is
647
furthermore assumed that the inner regex of REPETITION is of fixed
648
length and doesn't contain registers."))
650
(defmethod create-constant-repetition-constant-length-matcher
651
((repetition repetition) next-fn)
652
(declare #.*standard-optimize-settings*)
653
(let ((len (len repetition))
654
(repetitions (minimum repetition))
655
(regex (regex repetition)))
656
(declare (type fixnum len repetitions)
657
(type function next-fn))
659
;; if the length is zero it suffices to try once
660
(create-matcher-aux regex next-fn)
661
;; otherwise try to optimize for a couple of common cases
664
(let ((str (str regex)))
666
;; a single character
667
(let ((chr (schar str 0)))
668
(if (case-insensitive-p regex)
669
(constant-repetition-constant-length-closure
670
(and (char-equal chr (schar *string* curr-pos))
672
(constant-repetition-constant-length-closure
673
(and (char= chr (schar *string* curr-pos))
676
(if (case-insensitive-p regex)
677
(constant-repetition-constant-length-closure
678
(let ((next-pos (+ curr-pos len)))
679
(declare (type fixnum next-pos))
680
(and (*string*-equal str curr-pos next-pos 0 len)
682
(constant-repetition-constant-length-closure
683
(let ((next-pos (+ curr-pos len)))
684
(declare (type fixnum next-pos))
685
(and (*string*= str curr-pos next-pos 0 len)
689
(insert-char-class-tester (regex (schar *string* curr-pos))
690
(if (invertedp regex)
691
(constant-repetition-constant-length-closure
692
(and (not (char-class-test))
694
(constant-repetition-constant-length-closure
695
(and (char-class-test)
698
(if (single-line-p regex)
699
;; a dot which really matches everything - we just have to
700
;; advance the index into *STRING* accordingly and check
701
;; if we didn't go past the end
703
(declare (type fixnum start-pos))
704
(let ((next-pos (+ start-pos repetitions)))
705
(declare (type fixnum next-pos))
706
(and (<= next-pos *end-pos*)
707
(funcall next-fn next-pos))))
708
;; a dot which is not in single-line-mode - make sure we
709
;; don't match #\Newline
710
(constant-repetition-constant-length-closure
711
(and (char/= #\Newline (schar *string* curr-pos))
714
;; the general case - we build an inner matcher which just
715
;; checks for immediate success, i.e. NEXT-FN is #'IDENTITY
716
(let ((inner-matcher (create-matcher-aux regex #'identity)))
717
(declare (type function inner-matcher))
718
(constant-repetition-constant-length-closure
719
(funcall inner-matcher curr-pos))))))))
721
(defgeneric create-constant-repetition-matcher (repetition next-fn)
722
(declare #.*standard-optimize-settings*)
723
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
724
that REPETITION has a constant number of repetitions."))
726
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
727
(declare #.*standard-optimize-settings*)
728
(let ((repetitions (minimum repetition))
729
;; we make a reservation for our slot in *REPEAT-COUNTERS*
730
;; because we need to keep track of the number of repetitions
731
(rep-num (incf-after *rep-num*))
732
;; REPEAT-MATCHER is part of the closure's environment but it
733
;; can only be defined after NON-GREEDY-AUX is defined
735
(declare (type fixnum repetitions rep-num)
736
(type function next-fn))
737
(if (zerop (min-len repetition))
738
;; we make a reservation for our slot in *LAST-POS-STORES*
739
;; because we have to watch out for needless loops as the inner
740
;; regex might match zero-length strings
741
(let ((zero-length-num (incf-after *zero-length-num*)))
742
(declare (type fixnum zero-length-num))
743
(flet ((constant-aux (start-pos)
744
;; the actual matcher which first calls NEXT-FN and
745
;; on failure tries to match the inner regex of
746
;; REPETITION (if we haven't done so too often)
747
(declare (type fixnum start-pos)
748
(type function repeat-matcher))
750
(svref *last-pos-stores* zero-length-num)))
751
(when (and old-last-pos
752
(= (the fixnum old-last-pos) start-pos))
753
;; if we've been here before we matched a
754
;; zero-length string the last time, so we can
755
;; just carry on because we will definitely be
756
;; able to do this again often enough
757
(return-from constant-aux (funcall next-fn start-pos)))
758
;; otherwise remember this position for the next
760
(setf (svref *last-pos-stores* zero-length-num) start-pos)
761
(cond ((< (aref *repeat-counters* rep-num) repetitions)
762
;; not enough repetitions yet, try it again
763
(incf (aref *repeat-counters* rep-num))
764
;; note that REPEAT-MATCHER will call
765
;; CONSTANT-AUX again recursively
767
(funcall repeat-matcher start-pos)
768
(decf (aref *repeat-counters* rep-num))
769
(setf (svref *last-pos-stores* zero-length-num)
772
;; we're done - call NEXT-FN
773
(funcall next-fn start-pos))))))
774
;; create a closure to match the inner regex and to
775
;; implement backtracking via CONSTANT-AUX
777
(create-matcher-aux (regex repetition) #'constant-aux))
778
;; the closure we return is just a thin wrapper around
779
;; CONSTANT-AUX to initialize the repetition counter
781
(declare (type fixnum start-pos))
782
(setf (aref *repeat-counters* rep-num) 0
783
(aref *last-pos-stores* zero-length-num) nil)
784
(constant-aux start-pos))))
785
;; easier code because we don't have to care about zero-length
786
;; matches but basically the same
787
(flet ((constant-aux (start-pos)
788
(declare (type fixnum start-pos)
789
(type function repeat-matcher))
790
(cond ((< (aref *repeat-counters* rep-num) repetitions)
791
(incf (aref *repeat-counters* rep-num))
793
(funcall repeat-matcher start-pos)
794
(decf (aref *repeat-counters* rep-num))))
795
(t (funcall next-fn start-pos)))))
797
(create-matcher-aux (regex repetition) #'constant-aux))
799
(declare (type fixnum start-pos))
800
(setf (aref *repeat-counters* rep-num) 0)
801
(constant-aux start-pos))))))
803
;; the actual CREATE-MATCHER-AUX method for REPETITION objects which
804
;; utilizes all the functions and macros defined above
806
(defmethod create-matcher-aux ((repetition repetition) next-fn)
807
(with-slots ((minimum minimum)
812
(contains-register-p contains-register-p))
816
;; this should have been optimized away by CONVERT but just
818
(error "Got REPETITION with MAXIMUM 0 \(should not happen)"))
820
(= minimum maximum 1))
821
;; this should have been optimized away by CONVERT but just
823
(error "Got REPETITION with MAXIMUM 1 and MINIMUM 1 \(should not happen)"))
824
((and (eql minimum maximum)
826
(not contains-register-p))
827
(create-constant-repetition-constant-length-matcher repetition next-fn))
828
((eql minimum maximum)
829
(create-constant-repetition-matcher repetition next-fn))
832
(not contains-register-p))
833
(create-greedy-constant-length-matcher repetition next-fn))
837
(create-greedy-no-zero-matcher repetition next-fn))
839
(create-greedy-matcher repetition next-fn))
842
(not contains-register-p))
843
(create-non-greedy-constant-length-matcher repetition next-fn))
846
(create-non-greedy-no-zero-matcher repetition next-fn))
848
(create-non-greedy-matcher repetition next-fn)))))