正则表达式简单练习

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115



import re


def match():
    # match默认情况下会在起始位置匹配
    # print(re.match('www', 'www.runoob.com').span())  # (0, 3)
    # print(re.match('com', 'www.runoob.com'))  # None

    line = 'Cats are smarter than dogs'

    matchObj = re.match(r'(.*) are (.*?) .*', line, re.M | re.I)

    if matchObj:
        print('matchObj.group() :', matchObj.group())
        print('matchObj.group(1):', matchObj.group(1))
        print('matchObj.group(2):', matchObj.group(2))
    else:
        print('No Match!!')


def search():
    # search默认情况下会在全部范围内匹配
    print(re.search('www', 'www.runoob.com').span())
    print(re.search('com', 'www.runoob.com').span())

    line = 'Cats are smarter than dogs'

    searchObj = re.search(r'(.*) are (.*?) .*', line, re.M | re.I)

    if searchObj:
        print('searchObj.group() :', searchObj.group())
        print('searchObj.group(1):', searchObj.group(1))
        print('searchObj.group(2):', searchObj.group(2))
    else:
        print('No Match!!')


def sub():
    # phone = '2004-959-959 # 这是一个国外的电话号码'
    #
    # num = re.sub(r'#.*$', '', phone)
    # print('phone is: ', num)
    #
    # num = re.sub(r'\D', '', phone)
    # print('phone is: ', num)

    def double(matched):
        value = int(matched.group('value'))
        return str(value * 2)

    # 这个写法非常的不理解，待会研究一下
    s = 'A23G4HFD567'
    print(re.sub('(?P<value>\d+)', double, s))


def compile():
    # input = 'one12twothree34four'
    #
    # pattern = re.compile(r'\d+')
    #
    # print(pattern.match(input))  # 查找头部，没有匹配
    # print(pattern.match(input, 2, 10))  # 从'e'的位置开始匹配，没有匹配
    # print(pattern.match(input, 3, 10))  # 从'1'的位置开始匹配，匹配成功
    # print(pattern.match(input, 3, 10).group(0))
    # print(pattern.match(input, 3, 10).start(0))
    # print(pattern.match(input, 3, 10).end(0))
    # print(pattern.match(input, 3, 10).span(0))

    pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I)

    m = pattern.match('Hello World Wide Web')
    print(m)
    print(m.group(0))
    print(m.span(0))
    print(m.group(1))
    print(m.span(1))
    print(m.group(2))
    print(m.span(2))
    print(m.groups())
    # print(m.group(3))


def findall():
    # match和search是匹配一次，findall是匹配所有
    # pattern = re.compile(r'\d+')
    # print(pattern.findall('runoob 123 google 456'))
    # print(pattern.findall('run88oob123google456', 0, 10))

    print(re.findall(r'(\w+)=(\d+)', 'set width=20 and height=10'))


def finditer():
    it = re.finditer(r'\d+', '12a32bc43jf3')
    for match in it:
        print(match.group())


def split():
    print(re.split('\W+', 'runoob, runoob, runoob.'))
    print(re.split('(\W+)', ' runoob, runoob, runoob.'))
    print(re.split('\W+', ' runoob, runoob, runoob.', 1))
    print(re.split('a+', 'hello world'))


if __name__ == '__main__':
    # match()
    # search()
    # sub()
    # compile()
    # findall()
    # finditer()
    split()
参考资料

Python 正则表达式