[Python] itertools 모듈

Rudy·2022년 7월 1일

python 알고리즘

파이썬 표준 라이브러리

목록 보기

2/5

목표

사실 python docs를 참고하면 바로바로 알 수 있는 내용들이지만 이러한 함수의 존재조차 모르면 찾아보지도 못할 것 같아서 정리.

무한 iterator

별도의 매개변수나 처리가 없으면 무한히 원소를 꺼내쓸 수 있는 iterator

count([start, step])

#문서에서 제공하는 이해를 위한 대략적인 코드
def count(start=0, step=1):
    # count(10) --> 10 11 12 13 14 ...
    # count(2.5, 0.5) -> 2.5 3.0 3.5 ...
    n = start
    while True:
        yield n
        n += step

start 값 부터 step씩 증가하는 iterator. 정수가 아니어도 됨. 일부 iterable 객체에서 원하는 요소의 개수를 찾는 count함수와는 다름.

import itertools as it

for i in it.count():
	print(i)
    
#0 1 2 3 4 5 6 7 ... 무한히 반복.

cycle(itertable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def cycle(iterable):
    # cycle('ABCD') --> A B C D A B C D A B C D ...
    saved = []
    for element in iterable:
        yield element
        saved.append(element)
    while saved:
        for element in saved:
              yield element

주어진 iterable 객체를 별도로 저장하고 순환하며 반환. 매개변수로 사용하는 iterable 객체에 따라 많은 메모리를 잡아먹을 수 있음.

import itertools as it

for i in it.cycle([1,2,3,4]):
	print(i)
    
#1 2 3 4 1 2 3 4 ... 무한히 반복.

repeat(object, [times])

#문서에서 제공하는 이해를 위한 대략적인 코드
def repeat(object, times=None):
    # repeat(10, 3) --> 10 10 10
    if times is None:
        while True:
            yield object
    else:
        for i in range(times):
            yield object

주어진 객체를 무한히 반복 or 주어진 times 만큼 반복 반환. 일반적으로 다음과 같이 map, zip에 상수값을 반복하여 제공하는 형태로 사용.

>>> list(map(pow, range(10), repeat(2)))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

가장 짧은 입력 시퀀스에서 종료되는 iterator

accumulate(iterable, [func, initial])

#문서에서 제공하는 이해를 위한 대략적인 코드
def accumulate(iterable, func=operator.add, *, initial=None):
    'Return running totals'
    # accumulate([1,2,3,4,5]) --> 1 3 6 10 15
    # accumulate([1,2,3,4,5], initial=100) --> 100 101 103 106 110 115
    # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
    it = iter(iterable)
    total = initial
    if initial is None:
        try:
            total = next(it)
        except StopIteration:
            return
    yield total
    for element in it:
        total = func(total, element)
        yield total

iterable 객체에 누적 합(외 여러 함수 적용 가능)을 구해주는 함수. 일반적으로 입력으로 준 객체의 길이와 같은 길이. initial 매개변수를 주면 해당 값부터 시작하고, 입력객체의 길이보다 하나 더 많은 길이.

>>> list(accumulate(data, max))
[3, 4, 6, 6, 6, 9, 9, 9, 9, 9]

chain(*itertable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def chain(*iterables):
    # chain('ABC', 'DEF') --> A B C D E F
    for it in iterables:
        for element in it:
            yield element

chain.from_iterable(iterable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def from_iterable(iterables):
    # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
    for it in iterables:
        for element in it:
            yield element

위의 두 함수 모두 여러개의 iterable 객체를 하나의 iterator로 처리. 전자는 매개변수로 연결할 iterable 객체를 다 넣어주면 되고, 후자는 하나의 iterable 객체로 묶어서 전달.

compress(data, selectors)

#문서에서 제공하는 이해를 위한 대략적인 코드
def compress(data, selectors):
    # compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F
    return (d for d, s in zip(data, selectors) if s)

일종의 mask. 1, bool의 연산시 true인 부분만 반환. 꼭 두개의 길이가 같을 필요는 없음.

dropwhile(predicate, iterable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def dropwhile(predicate, iterable):
    # dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
    iterable = iter(iterable)
    for x in iterable:
        if not predicate(x):
            yield x
            break
    for x in iterable:
        yield x

처음으로 predicate가 false가 되는 부분부터 마지막까지 반환.

filterfalse(predicate, iterable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def filterfalse(predicate, iterable):
    # filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
    if predicate is None:
        predicate = bool
    for x in iterable:
        if not predicate(x):
            yield x

predicate가 false가 되는 원소들을 반환.

groupby(iterable,[key])

SQL의 groupby와는 조금 다른 동작. 기본적으로 iterable 객체가 정렬되어 있어야 함. 연속적으로 나오는 원소들을 묶어서 key와 해당 그룹으로 반환. 이는 groupby 클래스 객체를 생성하는 것. 해당 객체로 한번 실행하고 나면 동일한 객체에서 이전 그룹은 더 이상 보이지 않게 됨.

import itertools as it

test = 'asdfqwertasdfqwertfdgasddf'

for k,g in it.groupby(sorted(test)):
    print(k,list(g))
 
#결과
#a ['a', 'a', 'a']
#s ['s', 's', 's']
    
#정렬을 안했을 시
for k,g in it.groupby(test):
   print(k,list(g))

#결과
#a ['a']
#s ['s']
#a ['a']
#s ['s']
#a ['a']
#s ['s']

islice(iterable, *args)

#문서에서 제공하는 이해를 위한 대략적인 코드
def islice(iterable, *args):
    # islice('ABCDEFG', 2) --> A B
    # islice('ABCDEFG', 2, 4) --> C D
    # islice('ABCDEFG', 2, None) --> C D E F G
    # islice('ABCDEFG', 0, None, 2) --> A C E G
    s = slice(*args)
    start, stop, step = s.start or 0, s.stop or sys.maxsize, s.step or 1
    it = iter(range(start, stop, step))
    try:
        nexti = next(it)
    except StopIteration:
        # Consume *iterable* up to the *start* position.
        for i, element in zip(range(start), iterable):
            pass
        return
    try:
        for i, element in enumerate(iterable):
            if i == nexti:
                yield element
                nexti = next(it)
    except StopIteration:
        # Consume to *stop*.
        for i, element in zip(range(i + 1, stop), iterable):
            pass

iterable의 슬라이싱. 단 음수는 사용 불가.

pairwise(iterable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def pairwise(iterable):
    # pairwise('ABCDEFG') --> AB BC CD DE EF FG
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

순서대로 2개씩 묶어서 반환. python 3.10에서 추가됨.

starmap(func, iterable)

#문서에서 제공하는 이해를 위한 대략적인 코드
def starmap(function, iterable):
    # starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
    for args in iterable:
        yield function(*args)

이미 iterable 안의 요소들이 튜플 형태로 함수에 사용될 수 있도록 정리되어 있을 때 사용.

takewhile()

#문서에서 제공하는 이해를 위한 대략적인 코드
def takewhile(predicate, iterable):
    # takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4
    for x in iterable:
        if predicate(x):
            yield x
        else:
            break

dropwhile와 반대로 predicate가 거짓이 나오기 직전까지 반환.

tee(iterable, [n=2])

#문서에서 제공하는 이해를 위한 대략적인 코드
def tee(iterable, n=2):
    it = iter(iterable)
    deques = [collections.deque() for i in range(n)]
    def gen(mydeque):
        while True:
            if not mydeque:             # when the local deque is empty
                try:
                    newval = next(it)   # fetch a new value and
                except StopIteration:
                    return
                for d in deques:        # load it to all the deques
                    d.append(newval)
            yield mydeque.popleft()
    return tuple(gen(d) for d in deques)

n개씩 잘라서 분할. 이때 사용한 iterable을 다른 곳에서 사용하면 오류 발생 가능. 스레드로 동시 사용시에도 오류 발생 가능.

zip_longest(*iterable,[fillvalue=None])

#문서에서 제공하는 이해를 위한 대략적인 코드
def zip_longest(*args, fillvalue=None):
    # zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
    iterators = [iter(it) for it in args]
    num_active = len(iterators)
    if not num_active:
        return
    while True:
        values = []
        for i, it in enumerate(iterators):
            try:
                value = next(it)
            except StopIteration:
                num_active -= 1
                if not num_active:
                    return
                iterators[i] = repeat(fillvalue)
                value = fillvalue
            values.append(value)
        yield tuple(values)

iterable들의 각각의 요소들을 합쳐서 튜플형태로 하나씩 반환. 길이가 맞지 않으면 fillvalue로 채워 넣는다. 이때 하나의 iterable이라도 무한할 수 있다면 호출 수를 제한하여 사용할 필요가 있다.

조합형 iterator

순열, 조합의 결과를 반환 해주는 iterator

product(*args,[repeat=1])

#문서에서 제공하는 이해를 위한 대략적인 코드
def product(*args, repeat=1):
    # product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
    # product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
    pools = [tuple(pool) for pool in args] * repeat
    result = [[]]
    for pool in pools:
        result = [x+[y] for x in result for y in pool]
    for prod in result:
        yield tuple(prod)

데카르트 곱을 튜플 형태로 반환. 자기 자신과의 곱을 위해 repeat 매개변수를 사용.

permutations(iterable,[r=None])

#문서에서 제공하는 이해를 위한 대략적인 코드
def permutations(iterable, r=None):
    pool = tuple(iterable)
    n = len(pool)
    r = n if r is None else r
    for indices in product(range(n), repeat=r):
        if len(set(indices)) == r:
            yield tuple(pool[i] for i in indices)

n길이의 순열 반환. n이 주어지지 않으면 최대 길이. 반환되는 순서는 입력 iterable에 의존. 정렬되어 있다면 정렬된 순서로 반환됨.

combinations(iterable, r)

#문서에서 제공하는 이해를 위한 대략적인 코드
def combinations(iterable, r):
    pool = tuple(iterable)
    n = len(pool)
    for indices in permutations(range(n), r):
        if sorted(indices) == list(indices):
            yield tuple(pool[i] for i in indices)

r길이의 중복을 포함하지 않는 조합 반환. 반환되는 순서는 입력 iterable에 의존. 정렬되어 있다면 정렬된 순서로 반환됨.

combinations_with_replace(iterable, r)

#문서에서 제공하는 이해를 위한 대략적인 코드
def combinations_with_replacement(iterable, r):
    pool = tuple(iterable)
    n = len(pool)
    for indices in product(range(n), repeat=r):
        if sorted(indices) == list(indices):
            yield tuple(pool[i] for i in indices)

r길이의 중복을 포함하는 조합 반환. 반환되는 순서는 입력 iterable에 의존. 정렬되어 있다면 정렬된 순서로 반환됨.

Rudy

부족해도 너무 부족하다

이전 포스트

[Python] heapq 모듈

다음 포스트