정규표현식

Jane의 study note.·2022년 10월 1일

파이썬 Python

목록 보기

29/31

import re
p = re.compile('a.b')  # \n을 제외한 문자
m = p.match('a\nb')
print(m)
m = p.match('akb')
print(m)
m = p.match('a.b')
print(m)
p = re.compile('a.b', re.DOTALL) # \n도 매치하독 함
m = p.match('a\nb')
print(m)
m = p.match('akb')
print(m)

import re
p = re.compile("a[.]{3,}") # 점(.)이 3자이상
# p = re.compile("a.{3,}") # \n을 제외하고 아무글자나 3자이상
print(p.match("acccb"))
print(p.match("a....b"))
print(p.match("aaab"))
print(p.match("a.cccb"))

import re
p = re.compile("[a-z]+") # 영문자 소문자 여러개
#             012345678
m = p.search("5 python")
print(m.start() + m.end()) # 2 + 8
# m.start() pattern에 맞는 첫번째 인덱스
# m.end() pattern에 맞는 마지막 인덱스

import re
data = """
park 010-1234-5678
kim 010-1111-2222"""
#               이름      전화번호 1,2 전화번호 끝 4자리
p = re.compile('(\w+)\s(\d+[-]\d+)[-](\d+)')
print(p.sub("\g<2>-**** \g<1>", data))
print('========================')
for i in data.split("\n"): # 3건 공란,2건
    m = p.search(i)
    if m != None:
        print(m.group(2)+"-****"+" "+m.group(1))
        
import re
phone = """ 
park 010-9999-9988 
kim 010-9909-7789 
lee 010-8789-7768
"""
p = re.compile('(\d{3}[-]\d{3,4})[-]\d{4}')
print(p.sub("\g<1>-####", phone))

import re
email = "park@naver.com,kim@daum.net,lee@myhome.co.kr"
p = re.compile(".*[@].*[.](?=com$|net$).*$")
for i in email.split(","):
    m = p.match(i)
    if m != None:
        print(m.group())

import re
# r은 raw 첫글자 _또는 영문자 두번째부터 숫자/문자 여러개
p = re.compile(r'[_a-zA-Z]\w*')
m = p.search('123 abc 123 def')  # 조건에 맞는 단어 하나
print(m.group())
m = p.findall('123 abc 123 def') # 조건에 맞는 것 모두, 리스트형식
print(m)
p = re.compile('the')
print(p.findall('The cat was hungry,He was scared because of the cat'))
p = re.compile('the', re.I) # I : ignore 대소문자 구별 안함
print(p.findall('The cat was hungry,He was scared because of the cat'))

import re
p = re.compile('(ABC)+') # ABC묶여 있음
m = p.match('ABCABCABCAABB OK')
print(m)
print(m.group())

import re
p = re.compile('[a-z]', re.I) # re.I 대소문자 구별 안함
m = p.match('python')
print(m)
m = p.match("Python")
print(m)
m = p.match('PYTHON')
print(m)

import re
match1 = re.match('[0-9]','1234')
print(match1.group())
match1 = re.match('[0-9]','abc')
print(match1)
match1 = re.match('[0-9]+','1234')
print(match1)
print(match1.group())

import re
p = re.compile("Crow|Servo")
m = p.match('CrowHello')
print(m)
# ^ 시작
p = re.search('^Life','Life is too short')
print(p)
p = re.search('^Life','My Life')
print(p)

import re
# $는 끝나는 문자 short으로 끝나는 가
print(re.search('short$', 'Life is too short, you need python'))
print(re.search('short$', 'Life is too short'))
print(re.search('[$]', 'Life is too $ short')) # $ 끝, [$]문자 $
print(re.search('\$', 'Life is too $ short'))  # \$도 문자 $

import re
p1 = re.compile(r'\bclass\b')  # \b공란 스페이스
p2 = re.compile(r'class')
print(p1.search('no class at all'))
print(p2.search('no class at all'))
print(p1.search('the declassified algorithm'))
print(p2.search('the declassified algorithm'))

import re
p1 = re.compile(r'\Bclass\B')  # \B 공란/스페이스가 없는 것
p2 = re.compile(r'class')
print(p1.search('no class at all'))
print(p2.search('no class at all'))
print(p1.search('the declassified algorithm'))
print(p2.search('the declassified algorithm'))

import re
# python으로 시작, 공백, 숫자/문자 1자이상
p = re.compile('^python\s\w+', re.M)
data = """python one
life is too short
python two
you need python
python three"""
print(p.findall(data))

import re
# python으로 시작, 공백, 숫자/문자 1자이상
p = re.compile('^python\s\w+', re.M) # 여러줄 각각이 python으로 시작
# p = re.compile('\Apython\s\w+')
p = re.compile('\Apython\s\w+', re.M) # 첫번째 줄에 python시작
data = """python one
life is too short
python two
you need python
python three"""
print(p.findall(data))

import re
st = "foo.bar,autoexec.bat,sendmail.cf,a.exe"
# \n제외한 어떤 문자라도 여러번 반복+점(.)+bat로 끝나지 않는
# \n을 제외한 여러 문자
p = re.compile('.*[.](?!bat$|exe$).*$') # exe로 끝나는 것도 추가 제외
for i in st.split(","):
    m = p.match(i)
    if m != None:
        print(m.group())

import re
p = re.compile("(blue|white|red)")
m = p.sub('color','blue socks and red shoes and white hat')
print(m)
# sub :변경, subn : 변경과 변경된 갯수를 튜플로 반환

그룹핑?

# 괄호를 통해서 그룹핑을 할 수 있다.
# 그 후에 \g<그룹번호>를 통해 불러올 수 있다.


# 주민번호 뒷자리를 별표로 변경
import re  # 정규식을 지원하는 모듈
data = """ 
park 800905-1049118
kim  700905-1059119 
"""
pat = re.compile("(\d{6})-(\d{7})") # 숫자6자리-숫자7자리
print(pat.sub("\g<1>-*******",data))        # \g

Jane의 study note.

이전 포스트

DB연동

다음 포스트

정규표현식

파이썬 Python

그룹핑?

DB연동

Rest API

0개의 댓글