JavaScript 正则表达式

正则表达式用于字符串的模式匹配和操作。

正则基础

创建正则表达式

// 构造函数方式
const regex1 = new RegExp("abc", "gi");

// 字面量方式（推荐）
const regex2 = /abc/gi;

正则标志

标志	说明
g	全局匹配
i	不区分大小写
m	多行模式
s	dotAll 模式（. 匹配换行符）
u	Unicode 模式
y	粘性匹配

const str = "Hello hello HELLO";

str.match(/hello/);    // ["hello"] - 只匹配第一个
str.match(/hello/g);   // ["hello", "hello"] - 全局匹配
str.match(/hello/i);    // ["Hello"] - 不区分大小写
str.match(/hello/gi);  // ["Hello", "hello", "HELLO"] - 组合使用

字符类

基本字符类

// 点号 - 匹配任意字符（除换行符）
/a.c/.test("abc");   // true
/a.c/.test("aXc");   // true
/a.c/.test("a\nc");  // false

// 字符集 [...] - 匹配方括号内的任意字符
/[aeiou]/.test("hello");  // true
/[aeiou]/.test("xyz");     // false

// 否定字符集 [^...] - 匹配不在方括号内的字符
/[^aeiou]/.test("bcd");  // true
/[^aeiou]/.test("aei");  // false

预定义字符类

字符	说明
\d	数字 [0-9]
\D	非数字 [^0-9]
\w	单词字符 [a-zA-Z0-9_]
\W	非单词字符
\s	空白字符（空格、制表符、换行符）
\S	非空白字符
.	任意字符（除换行符）

/\d/.test("123");      // true
/\D/.test("abc");       // true
/\w/.test("hello_1");   // true
/\W/.test("!@#");       // true
/\s/.test(" ");         // true
/\S/.test("abc");       // true

范围字符类

/[a-z]/.test("hello");    // true
/[A-Z]/.test("HELLO");    // true
/[0-9]/.test("123");      // true
/[a-zA-Z]/.test("abc");   // true
/[a-zA-Z0-9]/.test("abc123"); // true

量词

基本量词

量词	说明
*	零个或多个（等价于 `{0,}`）
+	一个或多个（等价于 `{1,}`）
?	零个或一个（等价于 `{0,1}`）

/colou?r/.test("color");  // true - u 可选
/colou?r/.test("colour"); // true - u 可选
/go+gle/.test("gogle");   // true - o 至少一个
/go+gle/.test("google");  // true
/go*gle/.test("ggle");    // true - o 可选

数量量词

// {n} - 正好 n 个
/\d{3}/.test("123");    // true
/\d{3}/.test("12");     // false

// {n,} - 至少 n 个
/\d{2,}/.test("123");   // true

// {n,m} - n 到 m 个
/\d{2,4}/.test("123");  // true
/\d{2,4}/.test("1");    // false

贪婪与非贪婪

const str = "<div>内容1</div><div>内容2</div>";

// 贪婪匹配（默认）
str.match(/<div>.*<\/div>/);  // ["<div>内容1</div><div>内容2</div>"]

// 非贪婪匹配
str.match(/<div>.*?<\/div>/); // ["<div>内容1</div>"]
str.match(/<div>.+?<\/div>/); // ["<div>内容1</div>"]

位置锚点

锚点	说明
^	字符串开头
$	字符串结尾
\b	单词边界
\B	非单词边界

/^hello/.test("hello world");  // true - 开头匹配
/hello$/.test("world hello");   // true - 结尾匹配
/\bhello\b/.test("hello world"); // true - 完整单词
/\bhello\b/.test("helloworld");  // false - 不是完整单词

分组和引用

捕获分组

const date = "2024-03-15";
const regex = /(\d{4})-(\d{2})-(\d{2})/;
const match = date.match(regex);

console.log(match[0]);  // "2024-03-15" - 完整匹配
console.log(match[1]);  // "2024" - 第一个分组
console.log(match[2]);  // "03" - 第二个分组
console.log(match[3]);  // "15" - 第三个分组

// replace 中使用分组
"2024-03-15".replace(/(\d{4})-(\d{2})-(\d{2})/, "$3/$2/$1");  // "15/03/2024"

非捕获分组

// (?:...) 不创建分组引用
const str = "abc123def";
str.match(/(?:abc)(\d+)/);  // ["abc123", "123"] - 只有一个分组

命名分组

const date = "2024-03-15";
const regex = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const match = date.match(regex);

console.log(match.groups.year);   // "2024"
console.log(match.groups.month);  // "03"
console.log(match.groups.day);    // "15"

选择结构

/(cat|dog|bird)/.test("I have a cat");  // true
/(cat|dog|bird)/.test("I have a bird"); // true

// 嵌套选择
/(大|小)(红|白)猫/.test("大红猫");  // true

零宽断言

正向先行断言 `(?=)`

// 匹配后面跟着特定内容的前面部分
const str = "fooBar Baz";
str.match(/foo(?=Bar)/);  // ["foo"] - foo 后面是 Bar

// 提取价格（不带货币符号）
const prices = "$100 €200 ¥50";
const numRegex = /(?<=\$)\d+/g;
prices.match(numRegex);  // ["100"]

负向先行断言 `(?!)`

// 匹配后面不跟着特定内容的前面部分
const str = "fooBar fooBaz";
str.match(/foo(?!Bar)/);  // ["foo"] - foo 后面不是 Bar

// 匹配不以 .js 结尾的文件名
const files = ["test.js", "readme.txt", "app.js"];
files.filter(f => /^(?!.*\.js$).*$/);

正向后行断言 `(?<=)`

// 匹配前面跟着特定内容的前面部分
const str = "$100 ¥200 €300";
str.match(/(?<=\$)\d+/);  // ["100"]

// 提取括号内的内容
"hello(world)".match(/(?<=\()[^)]+/);  // ["world"]

负向后行断言 `(?<!)`

// 匹配前面不跟着特定内容的前面部分
const str = "fooBar fooBaz";
str.match(/(?<!Bar)foo/);  // ["foo"] - foo 前面不是 Bar

RegExp 方法

test()

const regex = /\d{3}-\d{4}/;
regex.test("123-4567");  // true
regex.test("abc-defg");  // false

exec()

const regex = /(\d{4})-(\d{2})-(\d{2})/g;
const str = "2024-03-15 和 2024-04-20";

let match;
while ((match = regex.exec(str)) !== null) {
    console.log(`完整匹配: ${match[0]}`);
    console.log(`年: ${match[1]}, 月: ${match[2]}, 日: ${match[3]}`);
}

String 方法

match()

const str = "2024-03-15";

// 非全局匹配
str.match(/(\d{4})-(\d{2})-(\d{2})/);
// ["2024-03-15", "2024", "03", "15", index: 0, input: "2024-03-15"]

// 全局匹配
str.match(/\d+/g);  // ["2024", "03", "15"]

matchAll()

const str = "2024-03-15 和 2024-04-20";
const regex = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/g;

for (const match of str.matchAll(regex)) {
    console.log(`${match.groups.year}年${match.groups.month}月${match.groups.day}日`);
}

replace()

const str = "Hello World";

// 简单替换
str.replace("World", "JavaScript");  // "Hello JavaScript"

// 正则替换
str.replace(/(\w+)\s(\w+)/, "$2 $1");  // "World Hello"

// 函数替换
str.replace(/\w+/g, (word) => word.toUpperCase());  // "HELLO WORLD"

split()

const str = "苹果,香蕉;橙子|葡萄";

// 使用字符串分割
str.split(",");  // ["苹果", "香蕉;橙子|葡萄"]

// 使用正则分割
str.split(/[,;|]/);  // ["苹果", "香蕉", "橙子", "葡萄"]

search()

const str = "Hello World";

// 返回第一个匹配的位置
str.search(/World/);  // 6
str.search(/Java/);    // -1（未找到）

常见正则表达式

验证类

// 手机号（中国大陆）
/^1[3-9]\d{9}$/

// 邮箱
/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/

// URL
/^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/

// 身份证号（中国）
/^[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]$/

// 密码强度（至少8位，包含大小写字母和数字）
/^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d]{8,}$/

提取类

// 提取 HTML 标签内容
/<(\w+)[^>]*>([^<]+)<\/\1>/g

// 提取 URL 参数
/[?&](\w+)=([^&]+)/g

// 提取日期
/\d{4}[-/]\d{2}[-/]\d{2}/g

// 提取 IP 地址
/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/g

替换类

// 去除空格
str.replace(/\s+/g, " ");

// 驼峰转连字符
"helloWorld".replace(/([a-z])([A-Z])/g, "$1-$2").toLowerCase();  // "hello-world"

// 敏感信息脱敏
"13812345678".replace(/(\d{3})\d{4}(\d{4})/, "$1****$2");  // "138****5678"

实用工具函数

// 验证手机号
function isPhone(phone) {
    return /^1[3-9]\d{9}$/.test(phone);
}

// 验证邮箱
function isEmail(email) {
    return /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email);
}

// 提取 URL 参数
function getUrlParams(url) {
    const params = {};
    url.replace(/[?&]+([^=&]+)=([^&]*)/g, (_, key, value) => {
        params[decodeURIComponent(key)] = decodeURIComponent(value);
    });
    return params;
}

// 敏感词过滤
function filterSensitiveWords(text, words) {
    const pattern = new RegExp(words.map(w => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|'), 'g');
    return text.replace(pattern, '***');
}

小结

正则表达式用于字符串的模式匹配
字符类、量词、锚点是正则的基础元素
分组用于捕获子匹配
零宽断言用于位置匹配
String 和 RegExp 对象都提供了正则相关方法

练习

验证手机号、邮箱、身份证号
实现一个简单的路由参数提取函数
实现一个敏感词过滤功能
解析 URL 中的查询参数
实现驼峰命名和下划线命名的互转

正则基础​

创建正则表达式​

正则标志​

字符类​

基本字符类​

预定义字符类​

范围字符类​

量词​

基本量词​

数量量词​

贪婪与非贪婪​

位置锚点​

分组和引用​

捕获分组​

非捕获分组​

命名分组​

选择结构​

零宽断言​

正向先行断言 (?=)​

负向先行断言 (?!)​

正向后行断言 (?<=)​

负向后行断言 (?<!)​

RegExp 方法​

test()​

exec()​

String 方法​

match()​

matchAll()​

replace()​

split()​

search()​

常见正则表达式​

验证类​

提取类​

替换类​

实用工具函数​

小结​

练习​

正则基础

创建正则表达式

正则标志

字符类

基本字符类

预定义字符类

范围字符类

量词

基本量词

数量量词

贪婪与非贪婪

位置锚点

分组和引用

捕获分组

非捕获分组

命名分组

选择结构

零宽断言

正向先行断言 `(?=)`

负向先行断言 `(?!)`

正向后行断言 `(?<=)`

负向后行断言 `(?<!)`

RegExp 方法

test()

exec()

String 方法

match()

matchAll()

replace()

split()

search()

常见正则表达式

验证类

提取类

替换类

实用工具函数

小结

练习