1 re.search()
(匹配,字符串内容)
group() 获取最终匹配结果以字符串输出
groups()以元组()输出 前提是匹配语句用()括起来 匹配分组 输出的时候才有值
groupdict()以字典{}键值对输出 在匹配语句前加 ?P<键名> 前提是在以元组输出的基础上 也就是匹配时用()括起来
import re
result = re.search(r"(?P<ip_address>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(.*)(?P<content>\[.*\])","10.120.1.1 - - [test is very nice 0800]")
print(result) #输出匹配的结果 很乱
print(result.group())
print(result.groups())
print(result.groupdict())
<re.Match object; span=(0, 39), match='10.120.1.1 - - [test is very nice 0800]'>
10.120.1.1 - - [test is very nice 0800]
('10.120.1.1', ' - - ', '[test is very nice 0800]')
{'ip_address': '10.120.1.1', 'content': '[test is very nice 0800]'}
Process finished with exit code 0
2 re.match()
(正则匹配,字符串)
只看开头 不会看整个字符串 用的不多
import re
result = re.match(r"p..h","python regex demo")
print(result)
<re.Match object; span=(0, 4), match='pyth'>
Process finished with exit code 0
3 re.findall()
(正则匹配,字符串)
以列表方式返回字符段中所有符合的内容
import re
result = re.findall(r"d..o","python is very demo and that is demo")
print(result)
['demo', 'demo']
Process finished with exit code 0
4 re.split()
(正则匹配,字符串)
正则匹配到的字符 作为 分隔符 分割字符串
import re
data = "root pts/0 (2023-03-03) 21:00"
rusult = re.split(r"\s{2,}",data)
print(rusult)
['root', 'pts/0', '(2023-03-03) 21:00']
Process finished with exit code 0
5 re.sub()
(正则匹配,替换后的内容,字符串)
正则匹配字符串
中的内容 换成 替换后内容
import re
url_list = ["http://1.1.1.1/logo.jpg","http://2.2.2.2/index.php","http://3.3.3.3/index.html"]
ip_address = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
#首相把列表中的数据挨个取出来,这是取出来的i值为字符串,才能去执行替换操作
for i in url_list:
# print(i)
# print(type(i))
test = re.sub(ip_address,"www.wuyupeng.com",i)
print(test)
http://www.wuyupeng.com/logo.jpg
http://www.wuyupeng.com/index.php
http://www.wuyupeng.com/index.html
Process finished with exit code 0
6.re.compile()
将正则表达式编译成对象,通过正则对象调用相应的操作方法完成数据处理
速度快 效率高 处理时间比传统快
import re
import time
start_time = time.time()
list = []
zd = {}
filename = r"E:\projecttest\files\access.log"
data = open(filename,mode="r")
#print(data.read())
#调用re.compile方法把正则语句放里面;变成正则对象
content = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
change_content = re.compile(content)
for i in data:
#ip_addr = re.search(content,i)
#调用change_content这个正则对象的search搜索i里面的内容
ip_addr = change_content.search(i)
list.append(ip_addr.group())
#print(list)
for ip in list:
if ip not in zd:
zd[ip] = 1
else:
zd[ip] += 1
print(zd)
for i,j in zd.items():
print("IP%s出现的次数为%s" % (i,j))
stop_time = time.time()
print(stop_time - start_time)
{'124.128.58.67': 38268, '175.6.147.168': 5217, '121.5.79.247': 1739, '194.36.80.225': 1739}
IP124.128.58.67出现的次数为38268
IP175.6.147.168出现的次数为5217
IP121.5.79.247出现的次数为1739
IP194.36.80.225出现的次数为1739
0.0747983455657959
Process finished with exit code 0
Comments NOTHING