使用 django/haystack/solr 实时更新占位符字段上的索引

Updating indexes on placeholderfields in real time with django/haystack/solr

您好,我正在将搜索集成到我的 django-cms 项目中。我做了一个 wiki 应用程序,其中每个页面的内容都存储在 PlaceholderField 中。我最初可以用 sudo ./manage.py rebuild_indexupdate_index 索引 PlaceholderField 的内容,并且搜索工作正常。问题是当我修改 PlaceholderField 时,搜索索引没有更新,即使我在 settings.py:

HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'

这是我的 model.py:

from django.db import models
from django.utils.text import slugify
from djangocms_text_ckeditor.fields import HTMLField
from cms.models.fields import PlaceholderField

def my_placeholder_slotname(instance):
    return 'content_placeholder'


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(my_placeholder_slotname)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug

这是我的 search_indexes.py:

import datetime
from haystack import indexes
from .models import WikiPage, WikiSection
from aldryn_search.helpers import get_cleaned_bits, get_request
from aldryn_search.utils import clean_join, get_index_base, strip_tags
from cms.models import CMSPlugin
from djangocms_text_ckeditor.cms_plugins import TextPlugin
from django.template import loader, Context


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        request = get_request(obj)
        placeholders = obj.content_placeholder
        plugins = placeholders.get_plugins()
        text_bits = []
        for plugin in plugins:
            cleaned = get_cleaned_bits(plugin.render_plugin())
            text_bits.extend(cleaned)           

        return clean_join(' ', text_bits)

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data



class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    class Meta:
        model_name = WikiSection
        app_label = 'wiki'

我正在寻求有关更新方法的帮助,但我不知道如何编写。我检查了文档,看到有 update_object and update 方法可以扩展,但不知道要 return.

编辑

我一直在推特,现在使用

class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()
    content_placeholder = indexes.CharField(model_attr='content_placeholder')
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        plugins = obj.content_placeholder.get_plugins()
        text_bits = []

        for plugin in plugins:
            plugin_text = self.get_plugin_search_text(plugin, get_request())
            text_bits.append(plugin_text)

        return clean_join(' ', text_bits)

    def get_plugin_search_text(self, base_plugin, request):
        plugin_content_bits = get_plugin_index_data(base_plugin, request)
        return clean_join(' ', plugin_content_bits)

这是我的 wikipage_text.txt:

{{ object.name }} - Wiki Page (Section {{ object.section }})
Content: {{ object.get_placeholder_text }}

EDIT2

对于那些尝试使用下面答案中的代码的人,这是我为使其完全正常工作所做的工作。下面的所有代码都是相同的,但添加了一些东西。一旦插件从占位符中变为 addedremoved,下面的解决方案就会更新索引,但当它是 edited 时则不会。就我而言,当 djangocms-text-ckeditor 插件中的文本发生更改时,我需要它进行更新。所需要的只是注册来自文本编辑器字段 from djangocms_text_ckeditor.models import Text 的信号,然后我从那里连接另一个信号:

from djangocms_text_ckeditor.models import Text

signals.post_save.connect(
        update_wiki_page_index,
        sender=Text,
        dispatch_uid='post_save_update_wiki_page_index'
    )

这个问题是网站中的所有页面都有占位符,并且所有页面都可能包含文本,这意味着此信号会经常触发。为了防止从 wiki.save(update_fields=['content_placeholder_data']) 不必要地调用数据库,我只是检查数据是否真的改变了,就像这样:

def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']
    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # Make sure data has changed
    if wiki.content_placeholder_data != get_placeholder_index_data(placeholder):
        # DB based approach
        placeholder = wiki.content_placeholder
        placeholder_data = get_placeholder_index_data(placeholder)
        wiki.content_placeholder_data = placeholder_data

        # Saving here will trigger index update
        wiki.save(update_fields=['content_placeholder_data'])

当您 add/delete 一个占位符的插件时,搜索索引不会更新,因为 haystack 信号只监听明确注册到搜索中的模型,这意味着您必须注册一个搜索您想要收听的每个插件的索引。

更好和更简单的方法是进行自己的信号处理,但特定于插件,django-cms 在内部使用这些信号来做一些事情,所以这是一种常见的情况。

我添加了三个文件、模型,search_indexes 对它们进行了一些修改,然后 helpers 是一个新的。

我将您的占位符名称函数更改为 return 一个更独特的值,并在那里添加了一些评论。

关于您的问题,有两种方法可以使用信号处理来解决。

一种是每次插件 saved/deleted 时计算占位符数据,然后 将此数据存储 在模型的字段中,然后当我们调用 save()使用 updated_fields 只更新我们想要的字段,haystack 的信号侦听器将被触发,从而触发更新。当 haystack 更新索引时,它只需要查看 db 中的数据,而不必再次重新计算插件数据。

第二种方法是简单地从信号处理程序手动触发更新,然后这将告诉 haystack 更新搜索引擎中的 wiki 对象,就像您保存 wiki 对象一样。

我已经在那里添加了两个解决方案,并用评论将它们分开,如果 wiki 占位符不断被修改,那么我建议使用数据库方法并将一些异步处理插入 haystack 更新信号(需要 celery)。 否则,您只能使用手动更新。

免责声明 这个我没有亲自测试过,只是根据以前的经验写的:)

models.py

from django.db import models
from django.db.models import signals
from django.utils.text import slugify

from djangocms_text_ckeditor.fields import HTMLField

from cms.models import CMSPlugin
from cms.models.fields import PlaceholderField
from cms.signals.plugins import get_placeholder

from .helpers import (
    get_index_from_model,
    get_placeholder_index_data,
)


def get_wiki_placeholder_name(instance):
    # Even though slotname is not UNIQUE at db level
    # is always good to make it as "unique" as possible.
    # In this case you will easily spot the placeholder for a wiki
    # based on it's slotname.
    return 'wiki_%s_placeholder' % instance.pk


def update_wiki_page_index(**kwargs):
    instance = kwargs['instance']

    if instance.pk is None:
        return

    placeholder = get_placeholder(plugin=instance)

    if placeholder is None:
        return

    try:
        wiki = WikiPage.objects.get(content_placeholder=placeholder)
    except WikiPage.DoesNotExist:
        return

    # DB based approach
    placeholder = wiki.content_placeholder
    placeholder_data = get_placeholder_index_data(placeholder)
    wiki.content_placeholder_data = placeholder_data
    # Saving here will trigger index update
    wiki.save(update_fields=['content_placeholder_data'])

    # OR

    # Realtime
    wiki.update_object_index()


class WikiPage(models.Model):
    slug = models.SlugField(max_length=50,primary_key=True)
    name = models.CharField(max_length=50)
    content = HTMLField(blank=True)
    section = models.ForeignKey('WikiSection', related_name='pages', db_index=True)
    content_placeholder = PlaceholderField(get_wiki_placeholder_name)
    content_placeholder_data = models.TextField(editable=False)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiPage, self).save(*args, **kwargs)

    def update_object_index(self):
        # By default will update all cores associated with object
        index = get_index_from_model(self._meta.model)

        if index:
            # update_object takes a using='' paremeter
            # if on a multi-language setup, you'll need to make sure
            # using reflects the language core
            index.update_object(instance=self.model_instance)

    def get_absolute_url(self):
        return '/wiki/page/%s' % self.slug


class WikiSection(models.Model):
    slug = models.SlugField(max_length=50, primary_key=True)
    name = models.CharField(max_length=50)

    def __str__(self):
        return self.name

    def save(self, *args, **kwargs):
        self.slug = slugify(self.name)
        super(WikiSection, self).save(*args, **kwargs)

    def get_absolute_url(self):
        return '/wiki/section/%s' % self.slug


signals.pre_delete.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='pre_delete_update_wiki_page_index'
)


signals.post_save.connect(
    update_wiki_page_index,
    sender=CMSPlugin,
    dispatch_uid='post_save_update_wiki_page_index'
)

search_indexes.py

from django.template import loader, Context

from haystack import indexes

from .helpers import get_placeholder_index_data
from .models import WikiPage, WikiSection


class WikiPageIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    content_placeholder = indexes.CharField()
    url = indexes.CharField()
    _backend_alias = 'vcoe'

    def get_model(self):
        return WikiPage

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_content_placeholder(self, obj):
        # DB approach
        data = obj.content_placeholder_data

        # OR

        # Realtime approach
        placeholder = obj.content_placeholder
        data = get_placeholder_index_data(placeholder)
        return data

    def prepare(self, obj):
        data = super(WikiPageIndex, self).prepare(obj)

        template = loader.select_template(
            ("search/indexes/wiki_app/wikipage_text.txt", ),
        )
        data["text"] = template.render(Context({
            "object": obj,
            'placeholder': self.prepare_content_placeholder(obj),
        }))
        return data


class WikiSectionIndex(indexes.SearchIndex, indexes.Indexable):
    text = indexes.CharField(document=True, use_template=True)
    title = indexes.CharField(model_attr='name')
    url = indexes.CharField()

    def get_model(self):
        return WikiSection

    def index_queryset(self, using=None):
        "Used when the entire index for model is updated."
        return self.get_model().objects.order_by('name')

    def prepare_url(self, obj):
        return obj.get_absolute_url()

helpers.py

from haystack import connections
from haystack.constants import DEFAULT_ALIAS
from haystack.exceptions import NotHandled

from aldryn_search.helpers import get_plugin_index_data, get_request
from aldryn_search.utils import clean_join


def get_plugin_search_text(base_plugin, request):
    plugin_content_bits = get_plugin_index_data(base_plugin, request)
    return clean_join(' ', plugin_content_bits)


def get_placeholder_index_data(placeholder):
    request = get_request()
    plugins = placeholder.get_plugins()
    text_bits = []

    for base_plugin in plugins:
        plugin_text_content = get_plugin_search_text(base_plugin, request)
        text_bits.append(plugin_text_content)
    return clean_join(' ', text_bits)


def get_index_from_model(model_class):
    # Notice I'm explicitly using DEFAULT_ALIAS here
    # on a multi-language setup, you'll have to get the alias
    # from current language.
    unified_index = connections[DEFAULT_ALIAS].get_unified_index()

    try:
        model_index = unified_index.get_index(model_class)
    except NotHandled:
        model_index = None
    else:
        model_index._backend_alias = DEFAULT_ALIAS
    return model_index