Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version: use field validator instead of custom field for slug #11957

Merged
merged 4 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion readthedocs/builds/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import taggit.managers
from django.db import migrations, models
from django_safemigrate import Safe

import readthedocs.builds.version_slug


class Migration(migrations.Migration):
safe = Safe.always
dependencies = [
("projects", "0001_initial"),
("taggit", "0001_initial"),
Expand Down Expand Up @@ -146,7 +148,6 @@ class Migration(migrations.Migration):
(
"slug",
readthedocs.builds.version_slug.VersionSlugField(
populate_from=b"verbose_name",
max_length=255,
verbose_name="Slug",
db_index=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Generated by Django 1.9.13 on 2018-10-17 04:20
from django.db import migrations, models
from django_safemigrate import Safe

import readthedocs.builds.version_slug


class Migration(migrations.Migration):
safe = Safe.always
dependencies = [
("builds", "0004_add-apiversion-proxy-model"),
]
Expand Down Expand Up @@ -77,7 +79,6 @@ class Migration(migrations.Migration):
field=readthedocs.builds.version_slug.VersionSlugField(
db_index=True,
max_length=255,
populate_from="verbose_name",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it OK to edit an old migration?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, we don't depend on anything here, and it's already applied. We should squash these to remove the old field completely. #9510

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏼 on squashing them

verbose_name="Slug",
),
),
Expand Down
33 changes: 33 additions & 0 deletions readthedocs/builds/migrations/0060_alter_version_slug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.17 on 2025-01-27 18:32

import django.core.validators
from django.db import migrations, models
from django_safemigrate import Safe


class Migration(migrations.Migration):

safe = Safe.after_deploy

dependencies = [
("builds", "0059_add_version_date_index"),
]

operations = [
migrations.AlterField(
model_name="version",
name="slug",
field=models.CharField(
db_index=True,
help_text="A unique identifier used in the URL and links for this version. It can contain lowercase letters, numbers, dots, dashes or underscores. It must start with a lowercase letter or a number.",
max_length=255,
validators=[
django.core.validators.RegexValidator(
message="Enter a valid slug consisting of lowercase letters, numbers, dots, dashes or underscores. It must start with a letter or a number.",
regex="^(?:[a-z0-9a-z][-._a-z0-9a-z]*?)$",
)
],
verbose_name="Slug",
),
),
]
18 changes: 15 additions & 3 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@
get_gitlab_username_repo,
get_vcs_url,
)
from readthedocs.builds.version_slug import VersionSlugField
from readthedocs.builds.version_slug import (
generate_unique_version_slug,
version_slug_validator,
)
from readthedocs.core.utils import extract_valid_attributes_for_model, trigger_build
from readthedocs.notifications.models import Notification
from readthedocs.projects.constants import (
Expand Down Expand Up @@ -118,10 +121,14 @@ class Version(TimeStampedModel):
#: in the URL to identify this version in a project. It's also used in the
#: filesystem to determine how the paths for this version are called. It
#: must not be used for any other identifying purposes.
slug = VersionSlugField(
slug = models.CharField(
_("Slug"),
max_length=255,
populate_from="verbose_name",
validators=[version_slug_validator],
db_index=True,
help_text=_(
"A unique identifier used in the URL and links for this version. It can contain lowercase letters, numbers, dots, dashes or underscores. It must start with a lowercase letter or a number."
),
)

# TODO: this field (`supported`) could be removed. It's returned only on
Expand Down Expand Up @@ -415,6 +422,11 @@ def clean_resources(self):
self.built = False
self.save()

def save(self, *args, **kwargs):
if not self.slug:
self.slug = generate_unique_version_slug(self.verbose_name, self)
super().save(*args, **kwargs)

def post_save(self, was_active=False):
"""
Run extra steps after updating a version.
Expand Down
253 changes: 84 additions & 169 deletions readthedocs/builds/version_slug.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,14 @@
import string
from operator import truediv

from django.core.validators import RegexValidator
from django.db import models
from django.utils.encoding import force_str
from django.utils.translation import gettext_lazy as _
from slugify import slugify as unicode_slugify


def get_fields_with_model(cls):
"""
Replace deprecated function of the same name in Model._meta.

This replaces deprecated function (as of Django 1.10) in Model._meta as
prescrived in the Django docs.
https://docs.djangoproject.com/en/1.11/ref/models/meta/#migrating-from-the-old-api
"""
return [
(f, f.model if f.model != cls else None)
for f in cls._meta.get_fields()
if not f.is_relation or f.one_to_one or (f.many_to_one and f.related_model)
]
class VersionSlugField(models.CharField):
"""Just for backwards compatibility with old migrations."""


# Regex breakdown:
Expand All @@ -50,164 +40,89 @@ def get_fields_with_model(cls):
# regexes.
VERSION_SLUG_REGEX = "(?:[a-z0-9A-Z][-._a-z0-9A-Z]*?)"

version_slug_validator = RegexValidator(
# NOTE: we use the lower case version of the regex here,
# since slugs are always lower case,
# maybe we can change the VERSION_SLUG_REGEX itself,
# but that may be a breaking change somewhere else...
regex=f"^{VERSION_SLUG_REGEX.lower()}$",
message=_(
"Enter a valid slug consisting of lowercase letters, numbers, dots, dashes or underscores. It must start with a letter or a number."
),
)


def generate_unique_version_slug(source, version):
slug = generate_version_slug(source) or "unknown"
queryset = version.project.versions.all()
if version.pk:
queryset = queryset.exclude(pk=version.pk)
base_slug = slug
iteration = 0
while queryset.filter(slug=slug).exists():
suffix = _uniquifying_suffix(iteration)
slug = f"{base_slug}_{suffix}"
iteration += 1
return slug


def generate_version_slug(source):
normalized = _normalize(source)
ok_chars = "-._" # dash, dot, underscore
slugified = unicode_slugify(
normalized,
only_ascii=True,
spaces=False,
lower=True,
ok=ok_chars,
space_replacement="-",
)
# Remove first character wile it's an invalid character for the beginning of the slug.
slugified = slugified.lstrip(ok_chars)
return slugified


def _normalize(value):
"""
Normalize some invalid characters (/, %, !, ?) to become a dash (``-``).

class VersionSlugField(models.CharField):
.. note::

We replace these characters to a dash to keep compatibility with the
old behavior and also because it makes this more readable.

For example, ``release/1.0`` will become ``release-1.0``.
"""
Inspired by ``django_extensions.db.fields.AutoSlugField``.
return re.sub("[/%!?]", "-", value)


Uses ``unicode-slugify`` to generate the slug.
def _uniquifying_suffix(iteration):
"""
Create a unique suffix.

ok_chars = "-._" # dash, dot, underscore
test_pattern = re.compile("^{pattern}$".format(pattern=VERSION_SLUG_REGEX))
fallback_slug = "unknown"

def __init__(self, *args, **kwargs):
kwargs.setdefault("db_index", True)

populate_from = kwargs.pop("populate_from", None)
if populate_from is None:
raise ValueError("missing 'populate_from' argument")

self._populate_from = populate_from
super().__init__(*args, **kwargs)

def get_queryset(self, model_cls, slug_field):
for field, model in get_fields_with_model(model_cls):
if model and field == slug_field:
return model._default_manager.all()
return model_cls._default_manager.all()

def _normalize(self, content):
"""
Normalize some invalid characters (/, %, !, ?) to become a dash (``-``).

.. note::

We replace these characters to a dash to keep compatibility with the
old behavior and also because it makes this more readable.

For example, ``release/1.0`` will become ``release-1.0``.
"""
return re.sub("[/%!?]", "-", content)

def slugify(self, content):
"""
Make ``content`` a valid slug.

It uses ``unicode-slugify`` behind the scenes which works properly with
Unicode characters.
"""
if not content:
return ""

normalized = self._normalize(content)
slugified = unicode_slugify(
normalized,
only_ascii=True,
spaces=False,
lower=True,
ok=self.ok_chars,
space_replacement="-",
)

# Remove first character wile it's an invalid character for the
# beginning of the slug
slugified = slugified.lstrip(self.ok_chars)

if not slugified:
return self.fallback_slug
return slugified

def uniquifying_suffix(self, iteration):
"""
Create a unique suffix.

This creates a suffix based on the number given as ``iteration``. It
will return a value encoded as lowercase ascii letter. So we have an
alphabet of 26 letters. The returned suffix will be for example ``_yh``
where ``yh`` is the encoding of ``iteration``. The length of it will be
``math.log(iteration, 26)``.

Examples::

uniquifying_suffix(0) == '_a'
uniquifying_suffix(25) == '_z'
uniquifying_suffix(26) == '_ba'
uniquifying_suffix(52) == '_ca'
"""
alphabet = string.ascii_lowercase
length = len(alphabet)
if iteration == 0:
power = 0
else:
power = int(math.log(iteration, length))
current = iteration
suffix = ""
for exp in reversed(list(range(0, power + 1))):
digit = int(truediv(current, length**exp))
suffix += alphabet[digit]
current = current % length**exp
return "_{suffix}".format(suffix=suffix)

def create_slug(self, model_instance):
"""Generate a unique slug for a model instance."""

# get fields to populate from and slug field to set
slug_field = model_instance._meta.get_field(self.attname)

slug = self.slugify(getattr(model_instance, self._populate_from))
count = 0

# strip slug depending on max_length attribute of the slug field
# and clean-up
slug_len = slug_field.max_length
if slug_len:
slug = slug[:slug_len]
original_slug = slug

# exclude the current model instance from the queryset used in finding
# the next valid slug
queryset = self.get_queryset(model_instance.__class__, slug_field)
if model_instance.pk:
queryset = queryset.exclude(pk=model_instance.pk)

# form a kwarg dict used to implement any unique_together constraints
kwargs = {}
for params in model_instance._meta.unique_together:
if self.attname in params:
for param in params:
kwargs[param] = getattr(model_instance, param, None)
kwargs[self.attname] = slug

# increases the number while searching for the next valid slug
# depending on the given slug, clean-up
while not slug or queryset.filter(**kwargs).exists():
slug = original_slug
end = self.uniquifying_suffix(count)
end_len = len(end)
if slug_len and len(slug) + end_len > slug_len:
slug = slug[: slug_len - end_len]
slug = slug + end
kwargs[self.attname] = slug
count += 1

is_slug_valid = self.test_pattern.match(slug)
if not is_slug_valid:
# pylint: disable=broad-exception-raised
raise Exception("Invalid generated slug: {slug}".format(slug=slug))
return slug

def pre_save(self, model_instance, add):
value = getattr(model_instance, self.attname)
# We only create a new slug if none was set yet.
if not value and add:
value = force_str(self.create_slug(model_instance))
setattr(model_instance, self.attname, value)
return value

def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
kwargs["populate_from"] = self._populate_from
return name, path, args, kwargs
This creates a suffix based on the number given as ``iteration``. It
will return a value encoded as lowercase ascii letter. So we have an
alphabet of 26 letters. The returned suffix will be for example ``yh``
where ``yh`` is the encoding of ``iteration``. The length of it will be
``math.log(iteration, 26)``.

Examples::

uniquifying_suffix(0) == 'a'
uniquifying_suffix(25) == 'z'
uniquifying_suffix(26) == 'ba'
uniquifying_suffix(52) == 'ca'
"""
alphabet = string.ascii_lowercase
length = len(alphabet)
if iteration == 0:
power = 0
else:
power = int(math.log(iteration, length))
current = iteration
suffix = ""
for exp in reversed(list(range(0, power + 1))):
digit = int(truediv(current, length**exp))
suffix += alphabet[digit]
current = current % length**exp
return suffix
Loading