GIN index 테스트

허진수·2023년 1월 22일
0

테스트 절차

모델 및 인덱스 생성

각각의 인덱스로 조회 및 시간 측정

모델 row 수를 100000, 1000000, 2000000, 3000000, …, 10000000으로 측정

class Test(Model):
    class Meta:
        indexes = [
            GinIndex(fields=['search_vector']),
            Index(fields=['username'])
        ]
        app_label = 'test'

    search_vector = SearchVectorField(blank=True, null=True)
    search_text = TextField(blank=True, null=True)
    username = CharField(max_length=150, blank=True, null=True)
    human_name = CharField(max_length=100, blank=True, null=True)

def test_index(data_cnt):
    from .models import TemporaryInformation
    from uuid import uuid4
    total_cnt = Test.objects.all().count()
    iter_cnt = data_cnt - total_cnt
    name = ''
    for _ in range(iter_cnt//10):
        name = str(uuid4())
        for __ in range(10):
            tmp = Test(
                username=name
            )
            tmp.save()
    import time
    time.sleep(30)
    start = time.time()
    print(Test.objects.filter(search_vector=name)[0].username)
    end = time.time()
    delta = end - start
    print(f'search vector: {delta} s')

    start1 = time.time()
    print(Test.objects.filter(username=name)[0].username)
    end2 = time.time()
    delta2 = end2 - start1
    print(f'normal index: {delta2} s')

    print(f'diff: {delta - delta2}')

    print(f'rate: {(delta / delta2)}')

def test_index_cnt(cnt):
    name = '113e8729-db91-4e70-a1b9-fca19c7292cf'
    import time
    from .models import Test
    for idx in cnt:
        print(f'{idx} 회차')
        print('----------------')
        start = time.time()
        print(Test.objects.filter(search_vector=name)[0].username)
        end = time.time()
        delta = end - start
        print(f'search vector: {delta} s')

        start1 = time.time()
        print(Test.objects.filter(username=name)[0].username)
        end2 = time.time()
        delta2 = end2 - start1
        print(f'normal index: {delta2} s')

테스트 결과

row 수 별 검색 소요 시간

GIN Index(search vector)Normal Index(index range scan)
1000000.0124721527099609380.003549337387084961
10000000.006449937820434570.0003342628479003906
20000000.00171613693237304690.00032210350036621094
30000000.008342027664184570.0003218650817871094
40000000.0180020332336425780.016844987869262695
50000000.0073390007019042970.00039124488830566406
60000000.0110852718353271480.0004980564117431641
70000000.00346016883850097660.00038313865661621094
80000000.00240182876586914060.00039887428283691406
90000000.039942979812622070.0012121200561523438
100000000.04064893722534180.004667043685913086

10000000건 반복 조회 테스트

GIN Index(search vector)Normal Index(index range scan)
1회차0.03855895996093750.0006618499755859375
2회차0.0083379745483398440.0004711151123046875
3회차0.0079660415649414060.0004279613494873047
4회차0.008141994476318360.00046706199645996094
5회차0.0079851150512695310.00046706199645996094
6회차0.0079250335693359380.00042819976806640625
7회차0.0079197883605957030.0004220008850097656
8회차0.0080389976501464840.0004839897155761719
9회차0.009661912918090820.0004928112030029297
10회차0.0084939002990722660.0004630088806152344
11회차0.0078971385955810550.00041294097900390625
12회차0.0082092285156250.0004851818084716797
13회차0.0079689025878906250.0004379749298095703
14회차0.0078709125518798830.0004029273986816406
15회차0.0079600811004638670.00041604042053222656
16회차0.0078740119934082030.0004038810729980469
17회차0.0082406997680664060.0004930496215820312
18회차0.008035182952880860.00043773651123046875
19회차0.0079338550567626950.00045680999755859375
20회차0.0081698894500732420.0004410743713378906
21회차0.008013963699340820.0004439353942871094
22회차0.008007764816284180.0004322528839111328
23회차0.0079352855682373050.0004169940948486328
24회차0.0078599452972412110.00041222572326660156
25회차0.0079181194305419920.0004439353942871094
26회차0.0080239772796630860.0005030632019042969
27회차0.0080261230468750.0005171298980712891
28회차0.0081379413604736330.0005068778991699219
29회차0.0079441070556640620.0004367828369140625
30회차0.0078980922698974610.0004248619079589844
profile
안녕하세요

0개의 댓글