Coverage report: /home/jsnell/.sbcl/site/cl-ppcre-1.2.13/lexer.lisp

Kind	Covered	All	%
expression	493	556	88.7
branch	70	76	92.1

Key

Not instrumented

Conditionalized out

Executed

Not executed

Both branches taken

One branch taken

Neither branch taken

1

;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-

2

;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.24 2005/04/01 21:29:09 edi Exp $

3

4

;;; The lexer's responsibility is to convert the regex string into a

5

;;; sequence of tokens which are in turn consumed by the parser.

6

;;;

7

;;; The lexer is aware of Perl's 'extended mode' and it also 'knows'

8

;;; (with a little help from the parser) how many register groups it

9

;;; has opened so far. (The latter is necessary for interpreting

10

;;; strings like "\\10" correctly.)

11

12

13

14

;;; Redistribution and use in source and binary forms, with or without

15

;;; modification, are permitted provided that the following conditions

16

;;; are met:

17

18

;;; * Redistributions of source code must retain the above copyright

19

;;; notice, this list of conditions and the following disclaimer.

20

21

;;; * Redistributions in binary form must reproduce the above

22

;;; copyright notice, this list of conditions and the following

23

;;; disclaimer in the documentation and/or other materials

24

;;; provided with the distribution.

25

26

;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED

27

;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

28

;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

29

;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY

30

;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

31

;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE

32

;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

33

;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

34

;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

35

;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

36

;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

37

38

(in-package #:cl-ppcre)

39

40

(declaim (inline map-char-to-special-class))

41

(defun map-char-to-special-char-class (chr)

42

(declare #.*standard-optimize-settings*)

43

"Maps escaped characters like \"\\d\" to the tokens which represent

44

their associated character classes."

45

(case chr

46

((#\d)

47

:digit-class)

48

((#\D)

49

:non-digit-class)

50

((#\w)

51

:word-char-class)

52

((#\W)

53

:non-word-char-class)

54

((#\s)

55

:whitespace-char-class)

56

((#\S)

57

:non-whitespace-char-class)))

58

59

(locally

60

(declare #.*standard-optimize-settings*)

61

(defstruct (lexer (:constructor make-lexer-internal))

62

"LEXER structures are used to hold the regex string which is

63

currently lexed and to keep track of the lexer's state."

64

(str ""

65

:type string

66

:read-only t)

67

(len 0

68

:type fixnum

69

:read-only t)

70

(reg 0

71

:type fixnum)

72

(pos 0

73

:type fixnum)

74

(last-pos nil

75

:type list)))

76

77

(defun make-lexer (string)

78

(declare (inline make-lexer-internal)

79

#-genera (type string string))

80

(make-lexer-internal :str (maybe-coerce-to-simple-string string)

81

:len (length string)))

82

83

(declaim (inline end-of-string-p))

84

(defun end-of-string-p (lexer)

85

(declare #.*standard-optimize-settings*)

86

"Tests whether we're at the end of the regex string."

87

(<= (lexer-len lexer)

88

(lexer-pos lexer)))

89

90

(declaim (inline looking-at-p))

91

(defun looking-at-p (lexer chr)

92

(declare #.*standard-optimize-settings*)

93

"Tests whether the next character the lexer would see is CHR.

94

Does not respect extended mode."

95

(and (not (end-of-string-p lexer))

96

(char= (schar (lexer-str lexer) (lexer-pos lexer))

97

chr)))

98

99

(declaim (inline next-char-non-extended))

100

(defun next-char-non-extended (lexer)

101

(declare #.*standard-optimize-settings*)

102

"Returns the next character which is to be examined and updates the

103

POS slot. Does not respect extended mode."

104

(cond ((end-of-string-p lexer)

105

nil)

106

(t

107

(prog1

108

(schar (lexer-str lexer) (lexer-pos lexer))

109

(incf (lexer-pos lexer))))))

110

111

(defun next-char (lexer)

112

(declare #.*standard-optimize-settings*)

113

"Returns the next character which is to be examined and updates the

114

POS slot. Respects extended mode, i.e. whitespace, comments, and also

115

nested comments are skipped if applicable."

116

(let ((next-char (next-char-non-extended lexer))

117

last-loop-pos)

118

(loop

119

;; remember where we started

120

(setq last-loop-pos (lexer-pos lexer))

121

;; first we look for nested comments like (?#foo)

122

(when (and next-char

123

(char= next-char #\()

124

(looking-at-p lexer #\?))

125

(incf (lexer-pos lexer))

126

(cond ((looking-at-p lexer #\#)

127

;; must be a nested comment - so we have to search for

128

;; the closing parenthesis

129

(let ((error-pos (- (lexer-pos lexer) 2)))

130

(unless

131

;; loop 'til ')' or end of regex string and

132

;; return NIL if ')' wasn't encountered

133

(loop for skip-char = next-char

134

then (next-char-non-extended lexer)

135

while (and skip-char

136

(char/= skip-char #\)))

137

finally (return skip-char))

138

(signal-ppcre-syntax-error*

139

error-pos

140

"Comment group not closed")))

141

(setq next-char (next-char-non-extended lexer)))

142

(t

143

;; undo effect of previous INCF if we didn't see a #

144

(decf (lexer-pos lexer)))))

145

(when *extended-mode-p*

146

;; now - if we're in extended mode - we skip whitespace and

147

;; comments; repeat the following loop while we look at

148

;; whitespace or #\#

149

(loop while (and next-char

150

(or (char= next-char #\#)

151

(whitespacep next-char)))

152

do (setq next-char

153

(if (char= next-char #\#)

154

;; if we saw a comment marker skip until

155

;; we're behind #\Newline...

156

(loop for skip-char = next-char

157

then (next-char-non-extended lexer)

158

while (and skip-char

159

                                             (char/= skip-char #\Newline))

160

finally (return (next-char-non-extended lexer)))

161

;; ...otherwise (whitespace) skip until we

162

;; see the next non-whitespace character

163

(loop for skip-char = next-char

164

then (next-char-non-extended lexer)

165

while (and skip-char

166

(whitespacep skip-char))

167

finally (return skip-char))))))

168

;; if the position has moved we have to repeat our tests

169

;; because of cases like /^a (?#xxx) (?#yyy) {3}c/x which

170

;; would be equivalent to /^a{3}c/ in Perl

171

(unless (> (lexer-pos lexer) last-loop-pos)

172

(return next-char)))))

173

174

(declaim (inline fail))

175

(defun fail (lexer)

176

(declare #.*standard-optimize-settings*)

177

"Moves (LEXER-POS LEXER) back to the last position stored in

178

\(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."

179

(unless (lexer-last-pos lexer)

180

(signal-ppcre-syntax-error "LAST-POS stack of LEXER ~A is empty" lexer))

181

(setf (lexer-pos lexer) (pop (lexer-last-pos lexer)))

182

nil)

183

184

(defun get-number (lexer &key (radix 10) max-length no-whitespace-p)

185

(declare #.*standard-optimize-settings*)

186

"Read and consume the number the lexer is currently looking at and

187

return it. Returns NIL if no number could be identified.

188

RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read

189

at most the next MAX-LENGTH characters. If NO-WHITESPACE-P is not NIL

190

we don't tolerate whitespace in front of the number."

191

(when (or (end-of-string-p lexer)

192

(and no-whitespace-p

193

(whitespacep (schar (lexer-str lexer) (lexer-pos lexer)))))

194

(return-from get-number nil))

195

(multiple-value-bind (integer new-pos)

196

(parse-integer (lexer-str lexer)

197

:start (lexer-pos lexer)

198

:end (if max-length

199

(let ((end-pos (+ (lexer-pos lexer)

200

                                               (the fixnum max-length)))

201

(lexer-len (lexer-len lexer)))

202

(if (< end-pos lexer-len)

203

end-pos

204

lexer-len))

205

(lexer-len lexer))

206

:radix radix

207

:junk-allowed t)

208

(cond ((and integer (>= (the fixnum integer) 0))

209

(setf (lexer-pos lexer) new-pos)

210

integer)

211

(t nil))))

212

213

(declaim (inline try-number))

214

(defun try-number (lexer &key (radix 10) max-length no-whitespace-p)

215

(declare #.*standard-optimize-settings*)

216

"Like GET-NUMBER but won't consume anything if no number is seen."

217

;; remember current position

218

(push (lexer-pos lexer) (lexer-last-pos lexer))

219

(let ((number (get-number lexer

220

:radix radix

221

:max-length max-length

222

:no-whitespace-p no-whitespace-p)))

223

(or number (fail lexer))))

224

225

(declaim (inline make-char-from-code))

226

(defun make-char-from-code (number error-pos)

227

(declare #.*standard-optimize-settings*)

228

"Create character from char-code NUMBER. NUMBER can be NIL

229

which is interpreted as 0. ERROR-POS is the position where

230

the corresponding number started within the regex string."

231

;; only look at rightmost eight bits in compliance with Perl

232

(let ((code (logand #o377 (the fixnum (or number 0)))))

233

(or (and (< code char-code-limit)

234

(code-char code))

235

(signal-ppcre-syntax-error*

236

error-pos

237

"No character for hex-code ~X"

238

number))))

239

240

(defun unescape-char (lexer)

241

(declare #.*standard-optimize-settings*)

242

"Convert the characters(s) following a backslash into a token

243

which is returned. This function is to be called when the backslash

244

has already been consumed. Special character classes like \\W are

245

handled elsewhere."

246

(when (end-of-string-p lexer)

247

(signal-ppcre-syntax-error "String ends with backslash"))

248

(let ((chr (next-char-non-extended lexer)))

249

(case chr

250

((#\E)

251

;; if \Q quoting is on this is ignored, otherwise it's just an

252

;; #\E

253

(if *allow-quoting*

254

:void

255

#\E))

256

((#\c)

257

;; \cx means control-x in Perl

258

(let ((next-char (next-char-non-extended lexer)))

259

(unless next-char

260

(signal-ppcre-syntax-error*

261

(lexer-pos lexer)

262

"Character missing after '\\c' at position ~A"))

263

(code-char (logxor #x40 (char-code (char-upcase next-char))))))

264

((#\x)

265

;; \x should be followed by a hexadecimal char code,

266

;; two digits or less

267

(let* ((error-pos (lexer-pos lexer))

268

(number (get-number lexer :radix 16 :max-length 2 :no-whitespace-p t)))

269

;; note that it is OK if \x is followed by zero digits

270

(make-char-from-code number error-pos)))

271

((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)

272

;; \x should be followed by an octal char code,

273

;; three digits or less

274

(let* ((error-pos (decf (lexer-pos lexer)))

275

(number (get-number lexer :radix 8 :max-length 3)))

276

(make-char-from-code number error-pos)))

277

;; the following five character names are 'semi-standard'

278

;; according to the CLHS but I'm not aware of any implementation

279

;; that doesn't implement them

280

((#\t)

281

#\Tab)

282

((#\n)

283

#\Newline)

284

((#\r)

285

#\Return)

286

((#\f)

287

#\Page)

288

((#\b)

289

#\Backspace)

290

((#\a)

291

(code-char 7)) ; ASCII bell

292

((#\e)

293

(code-char 27)) ; ASCII escape

294

(otherwise

295

;; all other characters aren't affected by a backslash

296

chr))))

297

298

(defun collect-char-class (lexer)

299

(declare #.*standard-optimize-settings*)

300

"Reads and consumes characters from regex string until a right

301

bracket is seen. Assembles them into a list \(which is returned) of

302

characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and

303

tokens representing special character classes."

304

(let ((start-pos (lexer-pos lexer)) ; remember start for error message

305

hyphen-seen

306

last-char

307

list)

308

(flet ((handle-char (c)

309

"Do the right thing with character C depending on whether

310

we're inside a range or not."

311

(cond ((and hyphen-seen last-char)

312

(setf (car list) (list :range last-char c)

313

last-char nil))

314

(t

315

(push c list)

316

(setq last-char c)))

317

(setq hyphen-seen nil)))

318

(loop for first = t then nil

319

for c = (next-char-non-extended lexer)

320

;; leave loop if at end of string

321

while c

322

do (cond

323

((char= c #\\)

324

;; we've seen a backslash

325

(let ((next-char (next-char-non-extended lexer)))

326

(case next-char

327

((#\d #\D #\w #\W #\s #\S)

328

;; a special character class

329

(push (map-char-to-special-char-class next-char) list)

330

;; if the last character was a hyphen

331

;; just collect it literally

332

(when hyphen-seen

333

(push #\- list))

334

;; if the next character is a hyphen do the same

335

(when (looking-at-p lexer #\-)

336

(push #\- list)

337

(incf (lexer-pos lexer)))

338

(setq hyphen-seen nil))

339

((#\E)

340

;; if \Q quoting is on we ignore \E,

341

;; otherwise it's just a plain #\E

342

(unless *allow-quoting*

343

(handle-char #\E)))

344

(otherwise

345

;; otherwise unescape the following character(s)

346

(decf (lexer-pos lexer))

347

(handle-char (unescape-char lexer))))))

348

(first

349

;; the first character must not be a right bracket

350

;; and isn't treated specially if it's a hyphen

351

(handle-char c))

352

((char= c #\])

353

;; end of character class

354

;; make sure we collect a pending hyphen

355

(when hyphen-seen

356

(setq hyphen-seen nil)

357

(handle-char #\-))

358

;; reverse the list to preserve the order intended

359

;; by the author of the regex string

360

(return-from collect-char-class (nreverse list)))

361

((and (char= c #\-)

362

last-char

363

(not hyphen-seen))

364

;; if the last character was 'just a character'

365

;; we expect to be in the middle of a range

366

(setq hyphen-seen t))

367

((char= c #\-)

368

;; otherwise this is just an ordinary hyphen

369

(handle-char #\-))

370

(t

371

;; default case - just collect the character

372

(handle-char c))))

373

;; we can only exit the loop normally if we've reached the end

374

;; of the regex string without seeing a right bracket

375

(signal-ppcre-syntax-error*

376

start-pos

377

"Missing right bracket to close character class"))))

378

379

(defun maybe-parse-flags (lexer)

380

(declare #.*standard-optimize-settings*)

381

"Reads a sequence of modifiers \(including #\\- to reverse their

382

meaning) and returns a corresponding list of \"flag\" tokens. The

383

\"x\" modifier is treated specially in that it dynamically modifies

384

the behaviour of the lexer itself via the special variable

385

*EXTENDED-MODE-P*."

386

(prog1

387

(loop with set = t

388

for chr = (next-char-non-extended lexer)

389

unless chr

390

do (signal-ppcre-syntax-error "Unexpected end of string")

391

while (find chr "-imsx" :test #'char=)

392

;; the first #\- will invert the meaning of all modifiers

393

;; following it

394

if (char= chr #\-)

395

do (setq set nil)

396

else if (char= chr #\x)

397

do (setq *extended-mode-p* set)

398

else collect (if set

399

(case chr

400

((#\i)

401

:case-insensitive-p)

402

((#\m)

403

:multi-line-mode-p)

404

((#\s)

405

:single-line-mode-p))

406

(case chr

407

((#\i)

408

:case-sensitive-p)

409

((#\m)

410

:not-multi-line-mode-p)

411

((#\s)

412

:not-single-line-mode-p))))

413

(decf (lexer-pos lexer))))

414

415

(defun get-quantifier (lexer)

416

(declare #.*standard-optimize-settings*)

417

"Returns a list of two values (min max) if what the lexer is looking

418

at can be interpreted as a quantifier. Otherwise returns NIL and

419

resets the lexer to its old position."

420

;; remember starting position for FAIL and UNGET-TOKEN functions

421

(push (lexer-pos lexer) (lexer-last-pos lexer))

422

(let ((next-char (next-char lexer)))

423

(case next-char

424

((#\*)

425

;; * (Kleene star): match 0 or more times

426

'(0 nil))

427

((#\+)

428

;; +: match 1 or more times

429

'(1 nil))

430

((#\?)

431

;; ?: match 0 or 1 times

432

'(0 1))

433

((#\{)

434

;; one of

435

;; {n}: match exactly n times

436

;; {n,}: match at least n times

437

;; {n,m}: match at least n but not more than m times

438

;; note that anything not matching one of these patterns will

439

;; be interpreted literally - even whitespace isn't allowed

440

(let ((num1 (get-number lexer :no-whitespace-p t)))

441

(if num1

442

(let ((next-char (next-char-non-extended lexer)))

443

(case next-char

444

((#\,)

445

(let* ((num2 (get-number lexer :no-whitespace-p t))

446

(next-char (next-char-non-extended lexer)))

447

(case next-char

448

((#\})

449

;; this is the case {n,} (NUM2 is NIL) or {n,m}

450

(list num1 num2))

451

(otherwise

452

(fail lexer)))))

453

((#\})

454

;; this is the case {n}

455

(list num1 num1))

456

(otherwise

457

(fail lexer))))

458

;; no number following left curly brace, so we treat it

459

;; like a normal character

460

(fail lexer))))

461

;; cannot be a quantifier

462

(otherwise

463

(fail lexer)))))

464

465

(defun get-token (lexer)

466

(declare #.*standard-optimize-settings*)

467

"Returns and consumes the next token from the regex string (or NIL)."

468

;; remember starting position for UNGET-TOKEN function

469

(push (lexer-pos lexer)

470

(lexer-last-pos lexer))

471

(let ((next-char (next-char lexer)))

472

(cond (next-char

473

(case next-char

474

;; the easy cases first - the following six characters

475

;; always have a special meaning and get translated

476

;; into tokens immediately

477

((#\))

478

:close-paren)

479

((#\|)

480

:vertical-bar)

481

((#\?)

482

:question-mark)

483

((#\.)

484

:everything)

485

((#\^)

486

:start-anchor)

487

((#\$)

488

:end-anchor)

489

((#\+ #\*)

490

;; quantifiers will always be consumend by

491

;; GET-QUANTIFIER, they must not appear here

492

(signal-ppcre-syntax-error*

493

(1- (lexer-pos lexer))

494

"Quantifier '~A' not allowed"

495

next-char))

496

((#\{)

497

;; left brace isn't a special character in it's own

498

;; right but we must check if what follows might

499

;; look like a quantifier

500

(let ((this-pos (lexer-pos lexer))

501

(this-last-pos (lexer-last-pos lexer)))

502

(unget-token lexer)

503

(when (get-quantifier lexer)

504

(signal-ppcre-syntax-error*

505

(car this-last-pos)

506

"Quantifier '~A' not allowed"

507

(subseq (lexer-str lexer)

508

(car this-last-pos)

509

(lexer-pos lexer))))

510

(setf (lexer-pos lexer) this-pos

511

(lexer-last-pos lexer) this-last-pos)

512

next-char))

513

((#\[)

514

;; left bracket always starts a character class

515

(cons (cond ((looking-at-p lexer #\^)

516

(incf (lexer-pos lexer))

517

:inverted-char-class)

518

(t

519

:char-class))

520

(collect-char-class lexer)))

521

((#\\)

522

;; backslash might mean different things so we have

523

;; to peek one char ahead:

524

(let ((next-char (next-char-non-extended lexer)))

525

(case next-char

526

((#\A)

527

:modeless-start-anchor)

528

((#\Z)

529

:modeless-end-anchor)

530

((#\z)

531

:modeless-end-anchor-no-newline)

532

((#\b)

533

:word-boundary)

534

((#\B)

535

:non-word-boundary)

536

((#\d #\D #\w #\W #\s #\S)

537

;; these will be treated like character classes

538

(map-char-to-special-char-class next-char))

539

((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)

540

;; uh, a digit...

541

(let* ((old-pos (decf (lexer-pos lexer)))

542

;; ...so let's get the whole number first

543

(backref-number (get-number lexer)))

544

(declare (type fixnum backref-number))

545

(cond ((and (> backref-number (lexer-reg lexer))

546

(<= 10 backref-number))

547

;; \10 and higher are treated as octal

548

;; character codes if we haven't

549

;; opened that much register groups

550

;; yet

551

(setf (lexer-pos lexer) old-pos)

552

;; re-read the number from the old

553

;; position and convert it to its

554

;; corresponding character

555

                                 (make-char-from-code (get-number lexer :radix 8 :max-length 3)

556

old-pos))

557

(t

558

;; otherwise this must refer to a

559

;; backreference

560

(list :back-reference backref-number)))))

561

((#\0)

562

;; this always means an octal character code

563

;; (at most three digits)

564

(let ((old-pos (decf (lexer-pos lexer))))

565

(make-char-from-code (get-number lexer :radix 8 :max-length 3)

566

old-pos)))

567

(otherwise

568

;; in all other cases just unescape the

569

;; character

570

(decf (lexer-pos lexer))

571

(unescape-char lexer)))))

572

((#\()

573

;; an open parenthesis might mean different things

574

;; depending on what follows...

575

(cond ((looking-at-p lexer #\?)

576

;; this is the case '(?' (and probably more behind)

577

(incf (lexer-pos lexer))

578

;; we have to check for modifiers first

579

;; because a colon might follow

580

(let* ((flags (maybe-parse-flags lexer))

581

(next-char (next-char-non-extended lexer)))

582

;; modifiers are only allowed if a colon

583

;; or a closing parenthesis are following

584

(when (and flags

585

                                      (not (find next-char ":)" :test #'char=)))

586

(signal-ppcre-syntax-error*

587

(car (lexer-last-pos lexer))

588

"Sequence '~A' not recognized"

589

(subseq (lexer-str lexer)

590

(car (lexer-last-pos lexer))

591

(lexer-pos lexer))))

592

(case next-char

593

((nil)

594

;; syntax error

595

(signal-ppcre-syntax-error

596

"End of string following '(?'"))

597

((#\))

598

;; an empty group except for the flags

599

;; (if there are any)

600

(or (and flags

601

(cons :flags flags))

602

:void))

603

((#\()

604

;; branch

605

:open-paren-paren)

606

((#\>)

607

;; standalone

608

:open-paren-greater)

609

((#\=)

610

;; positive look-ahead

611

:open-paren-equal)

612

((#\!)

613

;; negative look-ahead

614

:open-paren-exclamation)

615

((#\:)

616

;; non-capturing group - return flags as

617

;; second value

618

(values :open-paren-colon flags))

619

((#\<)

620

;; might be a look-behind assertion, so

621

;; check next character

622

(let ((next-char (next-char-non-extended lexer)))

623

(case next-char

624

((#\=)

625

;; positive look-behind

626

:open-paren-less-equal)

627

((#\!)

628

;; negative look-behind

629

:open-paren-less-exclamation)

630

((#\))

631

;; Perl allows "(?<)" and treats

632

;; it like a null string

633

:void)

634

((nil)

635

;; syntax error

636

(signal-ppcre-syntax-error

637

"End of string following '(?<'"))

638

(t

639

;; also syntax error

640

(signal-ppcre-syntax-error*

641

(1- (lexer-pos lexer))

642

"Character '~A' may not follow '(?<'"

643

next-char )))))

644

(otherwise

645

(signal-ppcre-syntax-error*

646

(1- (lexer-pos lexer))

647

"Character '~A' may not follow '(?'"

648

next-char)))))

649

(t

650

;; if next-char was not #\? (this is within

651

;; the first COND), we've just seen an opening

652

;; parenthesis and leave it like that

653

:open-paren)))

654

(otherwise

655

;; all other characters are their own tokens

656

next-char)))

657

;; we didn't get a character (this if the "else" branch from

658

;; the first IF), so we don't return a token but NIL

659

(t

660

(pop (lexer-last-pos lexer))

661

nil))))

662

663

(declaim (inline unget-token))

664

(defun unget-token (lexer)

665

(declare #.*standard-optimize-settings*)

666

"Moves the lexer back to the last position stored in the LAST-POS stack."

667

(if (lexer-last-pos lexer)

668

(setf (lexer-pos lexer)

669

(pop (lexer-last-pos lexer)))

670

(error "No token to unget \(this should not happen)")))

671

672

(declaim (inline start-of-subexpr-p))

673

(defun start-of-subexpr-p (lexer)

674

(declare #.*standard-optimize-settings*)

675

"Tests whether the next token can start a valid sub-expression, i.e.

676

a stand-alone regex."

677

(let* ((pos (lexer-pos lexer))

678

(next-char (next-char lexer)))

679

(not (or (null next-char)

680

(prog1

681

(member (the character next-char)

682

'(#\) #\|)

683

:test #'char=)

684

(setf (lexer-pos lexer) pos))))))