\(···\) 137
\<···\> 21, 25, 50, 133-134, 150
egrep 15
Emacs 101
mimicking in Perl 341-342
\+ 141
\? 141
‘\+’ history 87
\0 117-118
(see also backreferences)
Perl 41
(see also enhanced line-anchor mode)
optimization 246
\a 115-116
\B 134
(see also: word boundaries; backspace)
backspace and word boundary 44, 46
Java 368
Perl 286
PHP 442
\b\B 240
\C 120
PHP 442
Perl 288
PHP 442
\E 290
(see also literal-text mode)
\f 115-116
introduced 44
\G 130-133, 212, 315-316, 362, 447
(see also pos)
.NET 408
optimization 246
\kname (see named capture)
\l 290
\L···\E 290
inhibiting 292
introduced 44
machine-dependency 115
\N{LATIN SMALL LETTER SHARP S} 290
\N{name} 290
(see also pragma)
inhibiting 292
\p{^ ···} 288
(see also Unicode, properties)
Perl 125
\p{All} 125
Perl 288
\p{all} 369
Perl 288
\p{Arrows} 124
\p{Assigned} 125-126
Perl 288
\p{Basic_Latin} 124
\p{Box_Drawing} 124
\p{C} 122
Java 369
\p{Cc} 123
\p{Cf} 123
\p{Cherokee} 122
\p{Close_Punctuation} 123
Java 369
\p{Co} 123
\p{Connector_Punctuation} 123
\p{Control} 123
\p{Currency} 124
\p{Currency_Symbol} 123
\p{Dash_Punctuation} 123
\p{Decimal_Digit_Number} 123
\p{Dingbats} 124
\p{Enclosing_Mark} 123
\p{Final_Punctuation} 123
\p{Format} 123
\p{Gujarati} 122
\p{Han} 122
\p{Hangul_Jamo} 124
\p{Hiragana} 122
\p{InArrows} 124
\p{InBasic_Latin} 124
\p{InBox_Drawing} 124
\p{InCurrency} 124
\p{InCyrillic} 124
\p{InDingbats} 124
\p{InHangul_Jamo} 124
\p{InHebrew} 124
\p{Inherited} 122
\p{Initial_Punctuation} 123
\p{InKatakana} 124
\p{InTamil} 124
\p{InTibetan} 124
\p{IsCherokee} 122
\p{IsCommon} 122
\p{IsCyrillic} 122
\p{IsGujarati} 122
\p{IsHan} 122
\p{IsHebrew} 122
\p{IsHiragana} 122
\p{IsKatakana} 122
\p{IsLatin} 122
\p{IsThai} 122
\p{IsTibetan} 124
\p{javaJavaIdentifierStart} 369
\pL PHP 442
Java 369
Perl 288
\p{Latin} 122
\p{Letter_Number} 123
\p{Line_Separator} 123
\p{Lowercase_Letter} 123
\p{Mark} 122
\p{Math_Symbol} 123
\p{Mc} 123
\p{Me} 123
\p{Mn} 123
\p{Modifier_Letter} 123
\p{Modifier_Symbol} 123
\pN PHP 442
\p{Nl} 123
\p{No} 123
\p{Non_Spacing_Mark} 123
\p{Number} 122
\p{Open_Punctuation} 123
\p{Other} 122
\p{Other_Letter} 123
\p{Other_Number} 123
\p{Other_Punctuation} 123
\p{Other_Symbol} 123
\p{P} 122
\p{Paragraph_Separator} 123
\p{Pd} 123
\p{Pe} 123
\p{Pf} 123
Java 369
\p{Pi} 123
Java 369
\p{Po} 123
\p{Private_Use} 123
\p{Ps} 123
\p{Punctuation} 122
\p{S} 122
\p{Sc} 123-124
\p{Separator} 122
\p{Sk} 123
\p{Sm} 123
\p{So} 123
\p{Space_Separator} 123
\p{Spacing_Combining_Mark} 123
\p{Symbol} 122
\p{Tamil} 124
\p{Thai} 122
\p{Tibetan} 124
\p{Titlecase_Letter} 123
Perl 288
\p{Uppercase_Letter} 123
\pZ PHP 442
\p{Zl} 123
\p{Zp} 123
\p{Zs} 123
\Q···\E 290
inhibiting 292
machine-dependency 115
Emacs 128
introduction 47
Perl 288
PHP 442
introduced 44
\U 117
\U···\E 290
inhibiting 292
\V 364
Emacs 129
Java 368
many different interpretations 93
Perl 288
PHP 442
Perl 286
(see also enhanced line-anchor mode)
Java 370
optimization 246
(see also enhanced line-anchor mode)
optimization 246
PHP 442
// 322
/e 319-321
/g 61, 132, 307, 311-312, 315, 319
(see also \G)
introduced 51
with regex object 354
/i 135
(see also: case-insensitive mode; mode modifier)
introduced 47
with study 359
/m 135
(see also: enhanced line-anchor mode; mode modifier)
/o 352-353
with regex object 354
/osmosis 293
/s 135
(see also: dot-matches-all mode; mode modifier)
(see also: comments and free-spacing mode; mode modifier)
history 90
introduced 72
-Dr 363
-i as -y 86
-Mre=debug (see use re 'debug')
-y old grep 86
<> 54
and $_ 79
<br/> 481
!~ 309
# (see comments)
$& 299-300
checking for 358
naughty 356
.NET 424
OK for debugging 331
pre-match copy 355
example 202
$' 300
checking for 358
mimicking 357
naughty 356
.NET 424
OK for debugging 331
pre-match copy 355
$* 362
Perl 35
$' 300
checking for 358
mimicking 357
naughty 356
.NET 424
OK for debugging 331
pre-match copy 355
$$ .NET 424
(see also enhanced line-anchor mode)
escaping 77
Java 370
optimization 246
Perl interpolation 289
PHP 442
$_ 79, 308, 311, 314, 318, 322, 353-354, 359
.NET 424
$+[0] (see @+)
$0 300
Java 380
PHP 459
$-[0] (see @-)
${0} 459
introduced 41
Java 380
.NET 424
in other languages 138
pre-match copy 355
$all_matches 455
$ARGV 79
$HostnameRegex 76, 137, 303, 351
$matches 450
${name} 409
${name~} 424
$^W 297
% Perl interpolation 289
(?:···) (see non-capturing parentheses)
(···) (see parentheses)
(?1)
Java 402
PCRE 476
PHP 476
(?1) PHP 482
(?i) (see: case-insensitive mode; mode modifier)
(?i:···) (see mode-modified span)
⌈(?-i)⌋ 446
⌈(?i)⌋ 446
(?if then | else) (see conditional)
(?m) (see: enhanced line-anchor mode; mode modifier)
(?m:···) (see mode-modified span)
(?n) 408
(?<name>···) (see named capture)
(?'name'···) (see named capture)
(?P=name···) (see named capture)
(?P<name>···) (see named capture)
(?R) 475
PCRE 475
PHP 475
(?s) (see: dot-matches-all mode; mode modifier)
(?s:···) (see mode-modified span)
(?x:···) (see mode-modified span)
(?x) (see: comments and free-spacing mode; mode modifier)
*+ (see possessive quantifiers)
* (see star)
+ (see plus)
++ 483
(see also possessive quantifiers)
".*" (see double-quoted string example)
.*
introduced 55
mechanics of matching 152
optimization 246
warning about 56
$+ 202
after-match data 138
benchmarking 237
character-class subtraction 406
code example 219
flavor overview 92
JIT 410
line anchors 130
literal-text mode 136
MISL 410
object model 417
\p{···} 125
regex approach 96-97
regex flavor 407
search and replace 414, 423-424
URL example 204
version covered 405
word boundaries 134
(see also VB.NET)
introduced 38
? (see question mark)
?···? 308
?+ (see possessive quantifiers)
@"···" 103
@ Perl interpolation 289
[=···=] 128
[:···:] 127
[:<:] 91
[.···.] 128
(see also enhanced line-anchor mode)
Java 370
optimization 246
^Subject: example 94, 151-152, 154, 242-243, 245, 289
Java 95
.NET 96
Perl 55
Perl debugger 361
PHP 97
Python 97
| (see alternation)
$+ .NET 202
\0 117-118
$0 300
Java 380
PHP 459
(?1)
Java 402
PCRE 476
PHP 476
(see also backreferences)
Perl 41
introduced 41
Java 380
.NET 424
in other languages 138
pre-match copy 355
(?1) PHP 482
8859-1 encoding 29, 87, 106, 108, 123
\a 115-116
@ escaping 77
(see also enhanced line-anchor mode)
optimization 246
after-match data
Java 138
.NET 138
PHP 138
after-match variables
Perl 299
pre-match copy 355
\p{all} 369
\p{All} 125
Perl 288
$all_matches 455
collated 455
vs. $matches 454
stacked 456
alternation 139-140
and backtracking 231
greedy 174-175
hand tweaking 261
introduced 13-14
order of 175-177, 223, 260, 482
for efficiency 224
and parentheses 13
analogy
backtracking
bread crumbs 158-159
stacking dishes 159
ball rolling 262
building a car 31
charging batteries 179
engines 143-147
first come, first served 153
gas additive 150
learning regexes
Pascal 36
playing rummy 33
regex as filename patterns 4
regex-directed match (see NFA)
text-directed match (see DFA)
transparencies (Perl’s local) 298
anchor (see also: word boundaries; enhanced line-anchor mode)
caret 129
dollar 129
end-of-line optimization 246
exposing 256
overview 129
anchored(···) 362
anchored ‘string’ 362
anchoring bounds 388
Java 388
AND class set operations 125-126
ANSI escape sequences 79
Perl 288
any character (see dot)
appendReplacement method 380
appendTail method 381
$ARGV 79
array context (see list context)
\p{Arrows} 124
ASCII encoding 29, 106-107, 115, 123
Asian character encoding 29
AssemblyName 435
\p{Assigned} 125-126
Perl 288
asterisk (see star)
atomic grouping (see also possessive quantifiers)
details 170-172
for efficiency 171-172, 259-260, 268-270
essence 170-171
introduced 139
atomic grouping example 198, 201, 213, 271, 330, 340-341, 346
AT&T Bell Labs 86
author email xxiii
auto-lookaheadification 410
automatic possessification 251
awk
after-match data 138
gensub 182
history 87
search and replace 100
version covered 91
word boundaries 134
(see also: word boundaries; backspace)
backspace and word boundary 44, 46
Java 368
Perl 286
PHP 442
\B 134
\b\B 240
<B>···</B> 165-167
unrolling 270
introduced with egrep 20-22
vs. octal escape 412-413
remembering text 21
backspace (see \b)
backtracking 163-177
and alternation 231
avoiding 171-172
computing count 227
detecting excessive 249-250
efficiency 179-180
essence 168-169
exponential match 226
global view 228-232
introduction 157-163
LIFO 159
of lookaround 173-174
neverending match 226
non-match example 160-161
POSIX NFA example 229
saved states 159
simple example 160
simple lazy example 161
balanced constructs 328-331, 340-341, 436, 475-478, 481
balancing regex issues 186
Barwise, J. 85
Basic Regular Expressions 87-88
\p{Basic_Latin} 124
\b\B 240
benchmarking 232-239
comparative 249
compile caching 351
Java 235-236
for naughty variables 358
with neverending match 227
Perl 360
PHP 234-235
pre-match copy 356
Python 238-239
Ruby 238
Tcl 239
Berkeley 86
Better-Late-Than-Never 236
<B>···</B> 165-167
unrolling 270
blocks 124, 288, 369, 402, 407
BLTN 236
Java 236
BOL 362
\p{Box_Drawing} 124
brace (see interval)
bracket expressions 127
BRE 87-88
bread-crumb analogy 158-159
<br/> 481
bugs Java 365, 368-369, 387, 392, 399, 403
Bulletin of Math. Biophysics 85
bump-along
avoiding 210
distrusting 215-218
introduction 148-149
optimization 255
in overall processing 242
Byington, Ryan xxiv
byte matching 120, 442, 452-453, 456
¢ 124
\p{C} 122
Java 369
\C 120
PHP 442
C# (see also .NET)
strings 103
C comments
matching 272-276
unrolling 275-276
caching 242-245
(see also regex objects)
benchmarking 351
compile 242-245
Emacs 244
integrated 243
Java 478
.NET 432
object-oriented 244
Perl 350-352
PHP 478
procedural 244
Tcl 244
unconditional 350
Capture 437
CaptureCollection 438
capturing parentheses Java 377
car analogy 83-84
caret anchor introduced 8
case title 110
inhibiting 292
case-insensitive mode 110
egrep 14-15
/i 47
introduced 14-15
Ruby 110
with study 359
cast 294-295
categories (see Unicode, properties)
\p{Cc} 123
CDATA 483
Celsius (see temperature conversion example)
\p{Cf} 123
chaining (of methods) 389
character
base 120
classes xvii
(see also character class)
Inherited script 122
vs. combining characters 107
control 117
initial character discrimination 245-248, 252, 257-259, 332, 361
machine-dependent codes 115
multiple code points 108
as opposed to byte 29
separating with split 322
shorthands 115-116
character class 118
vs. alternation 13
vs. dot 119
elimination optimization 248
introduced 9-10
and lazy quantifiers 167
mechanics of matching 149
must match character 11-12
and newline 119
Tcl 112
positive assertion 119
of POSIX bracket expression 127
as separate language 10
set operations 125-127
subtraction 406
subtraction (set) 126
subtraction (simple) 125
character equivalent 128
character-class subtraction .NET 406
charnames pragma 290
CharSequence 365, 373, 382, 397
CheckNaughtiness 358
\p{Cherokee} 122
Chinese text processing 29
chr 420
chunk limit
Java 396
Perl 323
PHP 466
CJKV Information Processing 29
class xvii
initial class discrimination 245-248, 252, 257-259, 332, 361
(see also character class)
Click, Cliff xxiv
client VM 236
clock clicks 239
\p{Close_Punctuation} 123
closures 339
Java 369
\p{Co} 123
code example
Java 81, 209, 217, 235, 371, 375, 378-379, 381-384, 389
.NET 219
code point
beyond U+FFFF 109
introduced 107
multiple 108
unassigned in block 124
coerce 294-295
cold VM 236
collated data 455
collating sequences 128
Inherited script 122
commafying a number example 64-65
introduced 59
without lookbehind 67
COMMAND.COM 7
Java 98
matching of C comments 272-276
matching of Pascal comments 265
.NET 420
XML 483
comments and free-spacing mode 111
Communications of the ACM 85
comparison of engine types (see NFA)
Compilation failed 474
compile
caching 242-245
once (/o) 352-353
on-demand 351
regex 410-411
compile method 372
Compiled (.NET) 237, 408, 410, 420, 427-428, 435
Compilers — Principles, Techniques, and Tools 180
conditional 140-141
mimicking with lookaround 140
.NET 409-410
conflicting metacharacters 44-46
\p{Connector_Punctuation} 123
Constable, Robert 85
context (see also: list context; scalar context; match, context)
contorting
Perl 294
forcing 310
metacharacters 44-46
regex use 189
continuation lines 178, 186-187
unrolling 270-271
contorting an expression 294-295
\p{Control} 123
control characters 117
Conway, Damian 339
copy for $& (see pre-match copy)
correctness vs. efficiency 223-224
counting quantifier (see interval)
www.cpan.org 358
CR/LF 370
Cruise, Tom 51
crummy analogy 158-159
CSV parsing example
.NET 435
Perl 213-219
PHP 480
unrolling 271
VB.NET 219
\p{Currency} 124
currency
\p{Currency} 124
\p{Currency_Symbol} 123
\p{Sc} 123
Unicode block 123-124
\p{Currency_Symbol} 123
current location Java 374, 383, 398, 400
currentTimeMillis() 236
Perl 288
PHP 442
Darth 197
dash in character class 9
\p{Dash_Punctuation} 123
date_default_timezone_set 235
DBIx::DWIW 258
debugcolor 363
debugging 361-363
with embedded code 331-332
regex objects 305-306
run-time 362
\p{Decimal_Digit_Number} 123
default regex 308
define-key 101
delegate 423-424
delimited text 196-198
delimiter
with shell 7
with substitution 319
description Java 365
Deterministic Finite Automaton (see DFA)
Devel::FindAmpersand 358
Devel::SawAmpersand 358
DFA
acronym spelled out 156
boring 157
(see also NFA)
efficiency 179
implementation ease 183
lazy evaluation 181
longest-leftmost match 177-179
testing for 146-147
dialytika 108
\p{Dingbats} 124
directed alternation (see alternation, ordered)
dish-stacking analogy 159
dollar for Perl variable 37
dollar anchor 129
introduced 8
dollar value example 24-25, 51-52, 167-170, 175, 194-195
DOS 7
dot 119
vs. character class 119
introduced 11-12
Java 370
mechanics of matching 149
Tcl 113
dot 370
$+ 202
after-match data 138
benchmarking 237
character-class subtraction 406
code example 219
flavor overview 92
JIT 410
line anchors 130
literal-text mode 136
MISL 410
object model 417
\p{···} 125
regex approach 96-97
regex flavor 407
search and replace 414, 423-424
URL example 204
version covered 405
word boundaries 134
dot-matches-all mode 111-112
allowing escaped quotes 196
egrep 24
final regex 264
makudonarudo 165, 169, 228-232, 264
sobering example 222-228
double-word finder example 81
description 1
egrep 22
Emacs 101
Java 81
-Dr 363
dragon book 180
DWIW (DBIx) 258
dynamic regex 327-331
sanitizing 337
dynamic scope 295-299
vs. lexical scope 299
\E 290
(see also literal-text mode)
/e 319-321
earliest match wins 148-149
EBCDIC 29
ECMAScript (.NET) 406, 408, 412-413, 421, 427
ed 85
efficiency (see also optimization)
and backtracking 179-180
correctness 223-224
Perl 347-363
Perl-specific issues 347-363
PHP 478-480
regex objects 353-354
unlimited lookbehind 134
egrep
after-match data 138
backreference support 150
case-insensitive match 15
doubled-word solution 22
example use 14
flavor overview 92
flavor summary 32
history 86-87
introduced 6-8
metacharacter discussion 8-22
regex implementation 183
version covered 91
word boundaries 134
electric engine analogy 143-147
else (see conditional)
Emacs
after-match data 138
control characters 117
flavor overview 92
re-search-forward 101
search 100
strings as regexes 101
syntax class 128
version covered 91
word boundaries 134
email of author xxiii
email address example 70-73, 98
Java 98
.NET 99
embedded code
local 336
my 338-339
sanitizing 337
embedded string check optimization 247, 257
Embodiments of Mind 85
Empty 433
empty-element tag 481
encapsulation (see regex objects)
\p{Enclosing_Mark} 123
encoding (see also Unicode)
introduced 29
issues overview 105
UCS-2 107
UCS-4 107
UTF-16 107
END block 358
end method 377
end of line (see anchor, dollar)
end of previous match (see \G)
end of word (see word boundaries)
end-of-string anchor optimization 246
engine
analogy 143-147
implementation ease 183
introduced 27
testing type 146-147
with neverending match 227
type comparison 156-157, 180-183
English module 357
English vs. regex 275
enhanced line-anchor mode 112-113
introduced 69
ERE 87-88
ereg suite 439
errata xxiii
Escape 432
escape
introduced 22
term defined 27
essence
atomic grouping 170-171
greediness, laziness, and backtracking 168-169
NFA (see backtracking)
eval 319
example
atomic grouping 198, 201, 213, 271, 330, 340-341, 346
commafying a number 64-65
introduced 59
without lookbehind 67
CSV parsing
.NET 435
Perl 213-219
PHP 480
unrolling 271
VB.NET 219
dollar value 24-25, 51-52, 167-170, 175, 194-195
double-quoted string
allowing escaped quotes 196
egrep 24
final regex 264
makudonarudo 165, 169, 228-232, 264
sobering example 222-228
double-word finder 81
description 1
egrep 22
Emacs 101
Java 81
Java 98
.NET 99
five modifiers 316
floating-point number 194
form letter 50-51
gr[ea]y 9
hostname 22, 73, 76, 98-99, 137-138, 203, 260, 267-268, 304, 306, 450-451
egrep 25
Java 209
plucking from text 71-73, 206-208
in URL 74-77
validating 203-205
VB.NET 204
HREF 452
HTML 443-444, 459, 461, 464, 481, 484
conversion from text 67-77
encoding 414
<HR> 194
link 201-203
optional 140
paired tags 165
tag 9, 18-19, 26, 200-201, 326, 357
URL 74-77, 203, 206-208, 303, 450-451
URL-encoding 320
HTTP response 467
image tags 397
IP 5, 187-189, 267-268, 311, 314, 348-349
Jeffs 61-64
lookahead 61-64
mail processing 53-59
makudonarudo 165, 169, 228-232, 264
pathname 190-192
population 59
possessive quantifiers 198, 201
postal code 209-212
regex overloading 341-345
with alternation 175
with atomic grouping 170
with possessive quantifier 169
temperature conversion
Java 382
.NET 425
PHP 444
text-to-HTML 67-77
this|that 133, 139, 243, 245-247, 252, 255, 260-261
unrolling the loop 270-271, 477
URL 74-77, 201-204, 208, 260, 303-304, 306, 320, 450-451
egrep 25
Java 209
plucking 206-208
plucking from text 71-73
in URL 74-77
variable names 24
XML 481-484
ZIP code 209-212
exception
IllegalArgumentException 373, 380
IllegalStateException 376-377
IndexOutOfBoundsException 375-376, 380
IOException 81
PatternSyntaxException 371, 373
Explicit (Option) 415
ExplicitCapture (.NET) 408, 420, 427
exponential match 222-228, 330, 340
avoiding 264-266
discovery 226-228
explanation 226-228
non-determinism 264
short-circuiting 250
solving with atomic grouping 268
solving with possessive quantifiers 268
expose literal text 255
expression
context 294-295
contorting 294-295
Extended Regular Expressions 87-88
\f 115-116
introduced 44
Fahrenheit (see temperature conversion example)
failure
atomic grouping 171-172
forcing 241, 333, 335, 340-341
file globs 4
filename
patterns (globs) 4
prepending to line 79
Filo, David 397
\p{Final_Punctuation} 123
find method 375
region 384
FindAmpersand 358
Fite, Liz 33
five modifiers example 316
flags method 394
flavor
Perl 286-293
superficial chart
general 92
Java 367
.NET 407
PCRE 441
PHP 441
POSIX 88
term defined 27
flex version covered 91
floating regex cache (see regex objects)
floating ‘string’ 362
floating-point number example 194
forcing failure 241, 333, 335, 340-341
foreach vs. while vs. if 320
form letter example 50-51
\p{Format} 123
freeflowing regex 277-281
Friedl, Alfred 176
Friedl, brothers 33
birthday 11-12
Friedl, Jeffrey xxiii
fully qualified name 295
functions related to regexes in Perl 285
\G 130-133, 212, 315-316, 362, 447
(see also pos)
.NET 408
optimization 246
/g 61, 132, 307, 311-312, 315, 319
(see also \G)
introduced 51
with regex object 354
garbage collection Java benchmarking 236
gas engine analogy 143-147
general categories (see Unicode, properties)
gensub 182
George, Kit xxiv
GetGroupNames 427-428
GetGroupNumbers 427-428
gettimeofday 234
Gill, Stuart xxiv
global match (see /g)
global vs. private Perl variables 295
globs filename 4
GNU awk
after-match data 138
gensub 182
version covered 91
word boundaries 134
GNU egrep
after-match data 138
backreference support 150
doubled-word solution 22
-i bug 21
regex implementation 183
word boundaries 134
GNU Emacs (see Emacs)
GNU grep
shortest-leftmost match 182
version covered 91
GNU sed
after-match data 138
version covered 91
word boundaries 134
Gosling, James 89
GPOS 362
Greant, Zak xxiv
greatest weakness Perl 286
gr[ea]y example 9
greedy (see also lazy)
alternation 174-175
and backtracking 162-177
deference to an overall match 153, 274
favors match 167-168
first come, first served 153
global vs. local 182
introduced 151
localizing 225-226
quantifier 141
swapping 447
too greedy 152
green dragon 180
grep Perl 324
grep
as an acronym 85
flavor overview 92
history 86
regex flavor 86
version covered 91
-y option 86
group method 377
Group object (.NET) 418
Capture 437
creating 429
Index 430
Length 430
Success 430
ToString 430
using 430
Value 430
groupCount method 377
grouping and capturing 20-22
grouping-only parentheses (see non-capturing parentheses)
GroupNameFromNumber 427-428
GroupNumberFromName 427-428
Groups Match object method 429
\p{Gujarati} 122
Gutierrez, David xxiv
\p{Han} 122
hand tweaking
alternation 261
caveats 253
\p{Hangul_Jamo} 124
hasAnchoringBounds method 388
HASH(0x80f60ac) 257
hasTransparentBounds method 387
height attribute Java example 397
Hz 109
hex escape 117-118
Perl 286
highlighting with ANSI escape sequences 79
\p{Hiragana} 122
history
‘\+’ 87
AT&T Bell Labs 86
awk 87
Berkeley 86
ed trivia 86
egrep 86-87
grep 86
lex 87
PHP 440
of regexes 85-91
sed 87
underscore in \w 89
/x 90
hitEnd method 389-392
hostname example 22, 73, 76, 98-99, 137-138, 203, 260, 267-268, 304, 306, 450-451
egrep 25
Java 209
plucking from text 71-73, 206-208
in URL 74-77
validating 203-205
VB.NET 204
$HostnameRegex 76, 137, 303, 351
hot VM 236
HREF example 452
HTML
matching tag 200-201
HTML example 443-444, 459, 461, 464, 481, 484
conversion from text 67-77
encoding 414
<HR> 194
link 201-203
optional 140
paired tags 165
tag 9, 18-19, 26, 200-201, 326, 357
URL 74-77, 203, 206-208, 303, 450-451
URL-encoding 320
htmlspecialchars 461
HTTP newlines 115
HTTP response example 467
HTTP URL example 25, 74-77, 201-204, 206-209, 260, 303-304, 306, 320, 450-451
http://regex.info/ xxiii, 7, 345, 471
hybrid regex engine 182, 239, 243
hyphen in character class 9
Hz 109
(?i) (see: case-insensitive mode; mode modifier)
/i 135
(see also: case-insensitive mode; mode modifier)
introduced 47
with study 359
-i as -y 86
identifier matching 24
if (see conditional)
if vs. while vs. foreach 320
(?if then | else) (see conditional)
IgnoreCase (.NET) 96, 99, 408, 419, 427
IgnorePatternWhitespace (.NET) 99, 408, 419, 427
IllegalArgumentException 373, 380
IllegalStateException 376-377
image tags Java example 397
image tags example 397
implementation of engine 183
implicit 362
implicit anchor optimization 246
\p{InArrows} 124
\p{InBasic_Latin} 124
\p{InBox_Drawing} 124
\p{InCurrency} 124
\p{InCyrillic} 124
Index
Group object method 430
Match object method 429
IndexOutOfBoundsException 375-376, 380
\p{InDingbats} 124
indispensable TiVo 3
\p{InHangul_Jamo} 124
\p{InHebrew} 124
\p{Inherited} 122
initial class discrimination 245-248, 252, 257-259, 332, 361
\p{Initial_Punctuation} 123
\p{InKatakana} 124
inline modes (see modifiers)
\p{InTamil} 124
integrated handling 94
compile caching 243
interpolation 288-289
caching 351
introduced 77
mimicking 321
PHP 103
INTERSECTION class set operations 126
interval 141
introduced 20
⌈X{0,0}⌋ 141
\p{InTibetan} 124
introduced encoding 29
introduction Perl 37-38
IOException 81
IP example 5, 187-189, 267-268, 311, 314, 348-349
Iraq 11
Java 369
.NET 407
Perl 288
\p{IsCherokee} 122
\p{IsCommon} 122
\p{IsCyrillic} 122
\p{IsGujarati} 122
\p{IsHan} 122
\p{IsHebrew} 122
\p{IsHiragana} 122
isJavaIdentifierStart 369
\p{IsKatakana} 122
\p{IsLatin} 122
IsMatch (Regex object method) 421
ISO-8859-1 encoding 29, 87, 106, 108, 123
issues overview encoding 105
\p{IsThai} 122
\p{IsTibetan} 124