Memory leak when doing https request #1029

gjj2828 · 2016-08-01T07:53:22Z

I experienced memory leaks issues and narrowed it down to when I do https requests.

Here is the test code:

#!/usr/bin/env python3

import aiohttp
import asyncio
import random

async def Test(url):
    await asyncio.sleep(random.randint(0, 60))
    with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            await resp.text()
        await asyncio.sleep(3600)

if __name__ == '__main__':
    num = 2000
    url = 'https://www.paypal.com'
    # url = 'http://aiohttp.readthedocs.io'
    tasks = [Test(url) for i in range(num)]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

When I do https request I will see the memory goes up to 2.2g after 60 seconds. But when I do http request I will see the memory goes up to only 60m after 60 seconds.

asvetlov · 2016-08-01T10:56:58Z

Don't recreate ClientSession for every request -- it's pretty expensive.

Use the single session for the whole program.

mpaolini · 2016-08-01T11:27:03Z

yeah looks like some kind of leak is there even with a single session. Here's the updated script

#!/usr/bin/env python3

import aiohttp
import asyncio
import gc
import random


async def test(url, session, sleep=10):
    await asyncio.sleep(random.randint(0, sleep))
    async with session.get(url) as resp:
        await resp.text()
    await asyncio.sleep(sleep)


async def test_with_session(url, sleep):
    with aiohttp.ClientSession() as session:
        await test(url, session, sleep)


async def main_multisession(url, concurrency, sleep):
    tasks = [test_with_session(url, sleep) for i in range(concurrency)]
    await asyncio.wait(tasks)


async def main_singlesession(url, concurrency, sleep):
    with aiohttp.ClientSession() as session:
        tasks = [test(url, session, sleep) for i in range(concurrency)]
        await asyncio.wait(tasks)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--concurrency', type=int, default=10)
    parser.add_argument('--loops', type=int, default=10)
    parser.add_argument('--url', required=True)
    parser.add_argument('--sleep', type=int, default=1)
    parser.add_argument('--single-session', action='store_true')
    args = parser.parse_args()
    gc.disable()
    loop = asyncio.get_event_loop()
    print('calling url {}'.format(args.url))
    fun = main_singlesession if args.single_session else main_multisession
    for i in range(args.loops):
        loop.run_until_complete(fun(args.url, args.concurrency, args.sleep))
        print(gc.get_count())

and here's the results

(virtual) marco@augusto:~/src/aiohttp$ python test_leak.py --single --url https://google.com
calling url https://google.com
(3197, 10, 4)
(4699, 10, 4)
(5725, 10, 4)
(6926, 10, 4)
(7834, 10, 4)
(8880, 10, 4)
(10077, 10, 4)
(11435, 10, 4)
(13090, 10, 4)
(14411, 10, 4)

(virtual) marco@augusto:~/src/aiohttp$ python test_leak.py --url https://google.com
calling url https://google.com
(4307, 10, 4)
(6273, 10, 4)
(8236, 10, 4)
(10287, 10, 4)
(12296, 10, 4)
(13998, 10, 4)
(16281, 10, 4)
(18304, 10, 4)
(20250, 10, 4)
(22306, 10, 4)

mpaolini · 2016-08-01T13:15:45Z

updated leak detection script

#!/usr/bin/env python3

import aiohttp
import asyncio
import gc
import random


async def test(url, session, sleep=10):
    await asyncio.sleep(random.randint(0, sleep))
    async with session.get(url) as resp:
        await resp.text()
    await asyncio.sleep(sleep)


async def test_with_session(url, sleep):
    with aiohttp.ClientSession() as session:
        await test(url, session, sleep)


async def do_multi(url, concurrency, sleep):
    tasks = [test_with_session(url, sleep) for i in range(concurrency)]
    await asyncio.wait(tasks)


async def wait_and_cancel(tasks):
    tasks = [asyncio.ensure_future(task) for task in tasks]
    try:
        done, pending = await asyncio.wait(tasks)
    except asyncio.CancelledError:
        for task in tasks:
            task.cancel()


async def do_single(url, concurrency, sleep):
    with aiohttp.ClientSession() as session:
        tasks = [test(url, session, sleep) for i in range(concurrency)]
        await wait_and_cancel(tasks)


async def do_only_session(url, concurrency, sleep):
    with aiohttp.ClientSession():
        tasks = [asyncio.sleep(sleep) for i in range(concurrency)]
        await wait_and_cancel(tasks)

async def do_only_get(url, concurrency, sleep):
    tasks = [aiohttp.get(url) for i in range(concurrency)]
    await wait_and_cancel(tasks)


async def do_requests_request(url, session):
    resp = session.get(url)
    resp.content


async def do_requests(url, concurrency, sleep):
    import requests
    with requests.Session() as session:
        tasks = [do_requests_request(url, session) for i in range(concurrency)]
        await wait_and_cancel(tasks)

async def do_empty(url, concurrency, sleep):
    tasks = [asyncio.sleep(sleep) for i in range(concurrency)]
    await wait_and_cancel(tasks)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--concurrency', type=int, default=10)
    parser.add_argument('--loops', type=int, default=10)
    parser.add_argument('--url', required=True)
    parser.add_argument('--sleep', type=int, default=1)
    parser.add_argument(
        '--target',
        choices=[
            fun_name[3:] for fun_name in globals().keys()
            if fun_name.startswith('do_')
        ],
        required=True)
    parser.add_argument('--single-session', action='store_true')
    args = parser.parse_args()
    gc.disable()
    loop = asyncio.get_event_loop()
    print('calling url {}'.format(args.url))
    fun = globals()['do_{}'.format(args.target)]
    prev_count = gc.get_count()
    leaks = []
    for i in range(args.loops):
        task = asyncio.ensure_future(
            fun(args.url, args.concurrency, args.sleep)
        )
        try:
            loop.run_until_complete(task)
        except KeyboardInterrupt:
            task.cancel()
            loop.run_forever()
            task.exception()
            raise
        next_count = gc.get_count()
        if prev_count and prev_count < next_count:
            print('collectable found at iteration {i}: {next_count}'
                  .format_map(locals()))
            leaks.append((i, next_count))
        prev_count = next_count
    print('found {} leaks in iterations {!r} (total iterations: {})'.format(
        len(leaks),
        [leak[0] for leak in leaks],
        args.loops))

as you can see, vanilla asyncio is circular-reference free after starutp

(virtual) marco@augusto:~/src/aiohttp$ python test_leak.py --url https://google.com --target empty --loops 10
calling url https://google.com
collectable found at iteration 0: (732, 10, 4)
collectable found at iteration 1: (739, 10, 4)
found 2 leaks in iterations [0, 1] (total iterations: 10)

aiohttp is not

(virtual) marco@augusto:~/src/aiohttp$ python test_leak.py --url https://google.com --target single --loops 10
calling url https://google.com
collectable found at iteration 0: (3264, 10, 4)
collectable found at iteration 1: (4280, 10, 4)
collectable found at iteration 2: (5286, 10, 4)
collectable found at iteration 3: (6272, 10, 4)
collectable found at iteration 4: (7680, 10, 4)
collectable found at iteration 5: (9411, 10, 4)
collectable found at iteration 6: (10323, 10, 4)
collectable found at iteration 7: (11518, 10, 4)
collectable found at iteration 8: (12479, 10, 4)
collectable found at iteration 9: (14384, 10, 4)
found 10 leaks in iterations [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] (total iterations: 10)

but also requests leaks...

(virtual) marco@augusto:~/src/aiohttp$ python test_leak.py --url https://google.com --target requests --loops 10
calling url https://google.com
collectable found at iteration 0: (7095, 10, 4)
collectable found at iteration 1: (7629, 10, 4)
collectable found at iteration 2: (8164, 10, 4)
collectable found at iteration 3: (8680, 10, 4)
collectable found at iteration 4: (9214, 10, 4)
collectable found at iteration 5: (9730, 10, 4)
collectable found at iteration 6: (10264, 10, 4)
collectable found at iteration 7: (10780, 10, 4)
collectable found at iteration 8: (11314, 10, 4)
collectable found at iteration 9: (11831, 10, 4)
found 10 leaks in iterations [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] (total iterations: 10)

gjj2828 · 2016-08-02T01:11:20Z

@asvetlov
I want to simulate true client action. One session for one client and the session will keep the client status.
If the issue is for the expensive session creation, why it doesn't occur when I do http request ?

gjj2828 · 2016-08-02T01:28:41Z

@mpaolini
Thanks for your detailed tests!

zbagz · 2016-08-02T03:47:10Z

I think I'm experiencing a similar problem. I'm new to this kind of memory issues, but I've been doing a few novice tests using mem_top, and I've found that these keep increasing until kernel kills my python script for out of memory:

references:
70463   <class 'list'> [<TimerHandle when=93546 CookieJar._expire_cookie(1472692833.0, 'server')>, <TimerHandle whe...

types:
70463    <class 'asyncio.events.TimerHandle'>

I'm not sure if this is related to aiohttp, or even to this specific issue you guys are reporting, so I apologize if this is off topic. I will try to create a small script to reproduce this problem and post it tomorrow.

asvetlov · 2016-08-02T11:04:25Z

@zbagz CookieJar is part of aiohttp.
You see scheduled cookie deletions because cookies returned by server have an expiration period.
Honestly I have no idea how to avoid it. Even not sure we need to do something with the issue.
Well, in theory it's possible to invite data structure with less memory consumption. Perhaps the only callback for all long-living cookies should be enough.

If you don't need cookie processing at all you may implement your own no-op jar and pass it into session.

zbagz · 2016-08-02T19:01:10Z

Thanks for your help @asvetlov. I followed your advice as I don't need any cookie processing at all. This solved my problem:

from aiohttp.abc import AbstractCookieJar

class DummyJar(AbstractCookieJar):
    def __init__(self, loop=None):
        super().__init__(loop=loop)

    def update_cookies(self, cookies, response_url=None):
        pass

    def filter_cookies(self, request_url):
        return None

loop = asyncio.get_event_loop()
dummy_jar = DummyJar(loop=loop)

with aiohttp.ClientSession(loop=loop, cookie_jar=dummy_jar) as session:
    ...

asvetlov · 2016-08-02T19:09:18Z

@zbagz if you want to contribute your DummyJar into aiohttp (tests and doc update are required) -- you are welcome.

asvetlov · 2016-09-14T17:38:02Z

Fixed by #1162

lock · 2019-10-29T07:02:11Z

This thread has been automatically locked since there has not been
any recent activity after it was closed. Please open a new issue for
related bugs.

If you feel like there's important points made in this discussion,
please include those exceprts into that new issue.

This was referenced Aug 2, 2016

Refactor CookieJar by keeping long-standing cookie removal requests in the same loop.call_later callback #1039

Closed

Drop all pending calls on ClientSession closing #1040

Closed

asvetlov closed this as completed Sep 14, 2016

alefteris mentioned this issue Apr 21, 2017

Add DummyCookieJar helper #1830

Merged

5 tasks

aio-libs-bot mentioned this issue Mar 30, 2019

stream response memory leak #3673

Closed

lock bot added the outdated label Oct 29, 2019

lock bot locked as resolved and limited conversation to collaborators Oct 29, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Memory leak when doing https request #1029

Memory leak when doing https request #1029

gjj2828 commented Aug 1, 2016

asvetlov commented Aug 1, 2016

mpaolini commented Aug 1, 2016 •

edited

Loading

mpaolini commented Aug 1, 2016

gjj2828 commented Aug 2, 2016

gjj2828 commented Aug 2, 2016

zbagz commented Aug 2, 2016 •

edited

Loading

asvetlov commented Aug 2, 2016 •

edited

Loading

zbagz commented Aug 2, 2016

asvetlov commented Aug 2, 2016

asvetlov commented Sep 14, 2016

lock bot commented Oct 29, 2019

Memory leak when doing https request #1029

Memory leak when doing https request #1029

Comments

gjj2828 commented Aug 1, 2016

asvetlov commented Aug 1, 2016

mpaolini commented Aug 1, 2016 • edited Loading

mpaolini commented Aug 1, 2016

gjj2828 commented Aug 2, 2016

gjj2828 commented Aug 2, 2016

zbagz commented Aug 2, 2016 • edited Loading

asvetlov commented Aug 2, 2016 • edited Loading

zbagz commented Aug 2, 2016

asvetlov commented Aug 2, 2016

asvetlov commented Sep 14, 2016

lock bot commented Oct 29, 2019

mpaolini commented Aug 1, 2016 •

edited

Loading

zbagz commented Aug 2, 2016 •

edited

Loading

asvetlov commented Aug 2, 2016 •

edited

Loading