skip to Main Content

Based on other posts I understand Django doesn’t have a memory leak issue but I have a web application where when I specific routine is called, a lot of memory is getting used but not all of it is freed up afterwards. I don’t know if that is the correctly terminology but if I track the mem_used_perc on AWS while only calling this routine on the webpage I see the memory usage increase and not return to previous levels.

It is a recursive routine that I call which can iterate up to 7 times. This is the code:

def autosearch(self, phase=1, report="", num = 10):
        """
        This is an ES search following specific rules to identify and populate
        the lead notifications
        """
     
        if phase == 1:
            self.referred_to.clear()
            if self.no_of_providers:
                num = self.no_of_providers
            else:
                num = 10

        sqs = OrganisationDocument.search()

        service_type = None
        # filter by care type
        if self.type_of_care_care_home:
            service_type = "service_care_home"
        elif self.type_of_care_home_care:
            service_type = "service_home_care"
        elif self.type_of_care_live_in_care:
            service_type = "service_live_in_care"
        elif self.type_of_care_retirement_village:
            service_type = "service_retirement_village"

        if service_type == "service_retirement_village":
            sqs = sqs.query(Q("multi_match", query=True, fields=service_type))
        elif service_type:
            sqs = sqs.query(
                Q("multi_match", query=True, fields=service_type)
                & Q("match", care_over_65=True)
            )
        else:
            sqs = sqs.query(Q("match", care_over_65=True))

        if self.budget_type:
            ranges = self.filter_by_budget_range(phase)
            sqs = sqs.query(Q("bool", should=list(ranges)))
        
        # filter on location and distance
        if self.radius:
            radius = self.radius
        else:
            radius = 5

        """Increase radius by 2 or 10% for phase 2, 5, and 6"""
        if phase in [2, 6]:
            if radius < 20:
                radius += 2
            else:
                radius *= 1.1
        sqs = sqs.query(
                "geo_distance",
                distance=f"{radius}mi",
                location={
                    "lat": self.searcharea_set.all()[0].lat,
                    "lon": self.searcharea_set.all()[0].lng,
                },
            ) 
       
        # Filter by care_category_type
        categories = []
        if self.care_need_category_residential:
            categories.append("care_residential")
        if self.care_need_category_nursing:
            categories.append("care_nursing")
        if self.care_need_category_dementia:
            categories.append("care_dementia")
        if self.care_need_category_nursing_dementia:
            pass
        if self.care_need_category_personal_care:
            categories.append("care_residential")
        if self.care_need_category_respite_care:
            categories.append("care_respite")
        if self.care_need_palliative:
            pass
        if self.care_need_end_of_life:
            pass
        if self.care_need_retirement_housing:
            categories.append("retirement_living_housing")
        if self.care_need_retirement_village:
            categories.append("retirement_living_village")
        if self.care_need_retirement_community:
            categories.append("retirement_living_community")
        if self.care_need_retirement_full_time:
            pass

        query = []
        for category in categories:
            if self.type_of_care_live_in_care or self.type_of_care_home_care:
                if category == "care_residential":
                    category = "regulated_personal_care"

            if category == "care_nursing":
                query.append(
                    Q(
                        Q("match", regulated_nursing_care=True)
                        | Q("match", care_nursing=True)
                    )
                )
            else:
                query.append(Q("match", **{f"{category}": True}))

        if self.type_of_care_retirement_village:
            sqs = sqs.query("bool", should=list(query))
        else:
            sqs = sqs.filter(Q("bool", must=query))

        # CQC Regulator filter
        sqs = sqs.query(
            Q(
                Q("match", cqc_rating_overall=1)
                | Q("match", cqc_rating_overall=2)
                | Q("match", cqc_rating_overall=3)
                | Q("match", cqc_rating_overall=99)
            )
        )

        # filter on profile
        if phase >= 4:
            sqs = sqs.query(Q("match", has_thumbnail_image=1))
            # Exclude Standard profiles in Brand with Premium Profile
            prems = [x.id for x in self.referred_to.all() if x.is_premium]
            sqs = sqs.query(~Q('bool', brand_link=list(prems)))
        else:
            sqs = sqs.query(Q("match", is_premium=1))
        
        # filter on budget
        if self.budget_type:
            ranges = self.filter_by_budget_range(phase)
            sqs = sqs.query(Q("bool", should=list(ranges)))

        # funding method
        if self.funding_method == choices.LOCAL_AUTHORITY:
            sqs = sqs.query(~Q("match", fees_local_authority_funded=False))
            # sqs = sqs.query('match', **{'fees_local_authority_funded': True})
        elif self.funding_method == choices.SELF_FUNDING:
            sqs = sqs.query("match", self_funding_clients=True)
        elif self.funding_method == choices.CONTINUING_HEALTHCARE:
            sqs = sqs.query(Q("match", fees_continuous_health_top_up=True))
        elif self.funding_method == choices.TOP_UP:
            sqs = sqs.query(~Q("match", fees_family_top_up=False))

        # Red crossed
        sqs = sqs.query(~Q("match", autumna_flag=2))
        # amber flagged
        if phase < 7:
            sqs = sqs.query(~Q("match", autumna_flag=1))

        # email only
        if not self.may_contact_provider:
            sqs = sqs.query(~Q("match", leads_accepted=1))
            sqs = sqs.query(~Q("match", leads_accepted=2))

        # no permission
        if not self.may_contact_phone:
            sqs = sqs.query(~Q("match", leads_accepted=1))

        # timescales
        if self.timescales in ["ASAP", "2-4 weeks"]:
            sqs = sqs.query(
                Q(
                    Q("match", availability_overall=1)
                    | Q("match", availability_overall=2)
                )
            )
    
        sqs.sort("-is_premium", "-has_thumbnail_image", "-is_claimed", self.sort_geo_location_dict())
        sqs = sqs[:num]

        report += f"<p>Phase {phase}:"
        added_count = 0
        for organisation in sqs.to_queryset():
            if not organisation in self.referred_to.all():
                added_count += 1
                self.referred_to.add(organisation)
                report += f"{organisation}, "
        report += "</p>"

        if added_count >= num or phase >= 7:
            self.autosearch_interim_report = report
            self.save()
        else:
            phase += 1
            num -= added_count
            return self.autosearch(phase, report, num)
        return True

Is there any reason this should cause the symptoms I am seeing and how do I fix it?

EDIT

The function is invoked from admin with the following:

def autosearch(self, request, pk):
    """
    Populates the lead notifications
    """
    lead = get_object_or_404(models.Lead, pk=pk)

    # first, add organisations to favourites
    result = lead.autosearch()
    logger.debug(f'Auto search result: {result}')

    return redirect(reverse("admin:lead_management_lead_change", args=(pk,)))

EDIT

I am using Memcache which is defined as follows:

CACHES = {
    "default": {
        "BACKEND": "django.core.cache.backends.memcached.MemcachedCache",
        "LOCATION": os.environ.get("CACHE_LOCATION", "127.0.0.1:11211"),
    }
}

So locally I run it on the machine but in production I am using an environment variable in Elastic Beanstalk to define CACHE_LOCATION which is a string something like: my-site-name-prod.abcdef.cfg.euw2.cache.amazonaws.com:11211

EDIT

def filter_by_budget_range(self, phase):
    """
    How the function works:
        - case 1: When a given value of min and max budget
                - It simply filter organisations between this range.

        - case 2: When a given value is only max budget
            - From given max budget we derived the min budget
                - min_budget = max_budget - max_budget * 0.2
                - e.g max_budget = 100, then the min_budget = 100 - 100 * 0.2 => 80

        - case 3: When a given value is only min budget
            - From given min budget we derived the max budget
                - max_budget = min_budget + min_budget * 0.2
                - e.g min_budget = 100, then the max_budget = 100 + 100 * 0.2 => 120
    """
   
    _budget_min, _budget_max = self.min_max_handler(phase)        
    if _budget_max is not None:
        if self.type_of_care_live_in_care or self.type_of_care_care_home:
            yield {
                "range": {
                    "fees_weekly_residential_costs_from": {
                        "gte": _budget_min * 100,
                        "lte": _budget_max * 100,
                    }
                }
            }      
        else:
            yield {
                "range": {
                    "fees_hourly_start_from": {
                        "gte": _budget_min * 100,
                        "lte": _budget_max * 100,
                    }
                }
            }

2

Answers


  1. Does it help if you manually run gc.collect in your code?

    https://docs.python.org/3/library/gc.html

    collect(): This method free the non referenced object in the list that is maintained by the Collector. Some non-referenced objects are not immediately free automatically due to their implementation.

    ==============================================

    Are you sure your issue is with the memory not being freed up again?
    Are you having any memory issues?

    In linux it is typical for memory to remain used even after the program has freed it again, since, wat good is your RAM if you keep it empty all the time.
    The OS will keep things cached in RAM even if your application doesn’t really need it at the moment, because it might benefit from increased performance when later on the application wants that data again and it is still in your ram.

    This is called: caching

    I tried looking up the documentation of the mem_used[_percent] call and it was not really clear to me if that meant in use memory (by applications + caches) or memory used by applications
    https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/metrics-collected-by-CloudWatch-agent.html

    Do you have any issues with free memory? is your server going out of memory?

    e.g. is mem_available and mem_free going back up after the call is done or not?

    What you can typically see is memory being used by an application, then freed, but cashed by the os.

    So mem_free goes back up, but it is cashed in memory, so mem_cached is going up, and mem_used stays the same.

    This could explain what you are seeing without having a memory leak.

    e.g. on my current system what I am seeing when I do cat /proc/meminfo I see almost no memory Free, but most of it (68%) is taken up by caches wich will be cleared as soon as an application needs memory.

       1   │ MemTotal:       16068712 kB
       2   │ MemFree:          178332 kB
       3   │ MemAvailable:   10290764 kB
       4   │ Buffers:          812504 kB
       5   │ Cached:         10203120 kB
       6   │ SwapCached:         1596 kB
       7   │ Active:          4852664 kB
       8   │ Inactive:       10034536 kB
    
    Login or Signup to reply.
  2. There is nothing in the provided code that could explain a memory leak.

    The issue must come from somewhere else (possibly self.filter_by_budget_range(phase)).

    More often than not, memory leaks in Django would come from side-effects when using objects that are created at server startup, and that you keep feeding with new data without even realizing it, or without being aware that the object is bound to the server and not to single requests.

    For instance, if you have something like that:

    class Foobar(models.Model):
        ...
        baz = ""
        ...
    
        def barfoo(self, baz):
            ...
            self.baz += baz
            ...
    

    For every request where you call obj.barfoo(some_string), Foobar.baz will keep growing until the server is restarted.

    Similarily, in the following example:

    def foobar(baz=[]):
        ...
        baz.append(something)
        ...
    

    Where the function foobar is created once at server startup. Every time you call foobar() with the default argument, baz keeps growing until the server is restarted.

    These two examples are of course silly, it’s just to show simple cases of side-effects affecting the memory.


    Another possible issue would be if you cache some stuff using a memory-based backend such as Memcached.


    If you have no idea what it could be, your best bet would probably be to try to reproduce the issue in development and use pdb (or even django-pdb) to inspect the memory, either while running the development server or directly in the shell (which could be more handy if you can reproduce in the shell).

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search