Simple Django module to anonymize production data for safe usage on non-production environments.
pip install django-anonymous
In your app create a file anon.py
:
from django_anonymous import Anonymizer, Faker, register
from .model import YourModel
@register(YourModel)
class YourModelAnonymizer(Anonymizer):
# You can give any callable, Faker is a small wrapper around the `faker` library.
email = Faker("email", unique=True)
# You can also use any static value
first_name = "Anon"
Run the anonymizer
python manage.py anonymize
You can set a custom QuerySet to filter out some objects
from django_anonymous import Anonymizer, Faker, register
from .model import YourModel
@register(YourModel)
class YourModelAnonymizer(Anonymizer):
email = Faker("email", unique=True)
def get_queryset(self):
return super().get_queryset().filter(is_staff=True)
Default it will use the object id as seed, to generate the same data for every run.
You can disable this by overriding the get_object_seed
and return falsy value.
from django_anonymous import Anonymizer, Faker, register
from .model import YourModel
@register(YourModel)
class YourModelAnonymizer(Anonymizer):
email = Faker("email", unique=True)
def get_object_seed(self, obj):
return None
Per Anonymizer you can set the select chunk size and update batch size. Default it will not anonymize a field that has no value.
from django_anonymous import Anonymizer, Faker, register
from .model import YourModel
@register(YourModel)
class YourModelAnonymizer(Anonymizer):
SELECT_CHUNK_SIZE = 100
UPDATE_BATCH_SIZE = 25
ANONYMIZE_EMPTY_FIELD = False
email = Faker("email", unique=True)