SQLAlchemy

Question

如果这个问题已经在其他地方得到回答，请原谅我。我一直在搜索 SO，但无法将看似相关的问答翻译成我的场景。

我正在做一个有趣的个人项目，其中有 4 个主要模式（暂时排除关系）：

角色（姓名、简介）
剧集（标题、情节）
剪辑（url，时间戳）
图片（url）

限制（关系基础）：

角色可以出现在多个剧集中，以及这些剧集中的多个剪辑和图像（但可能并非所有 clips/images 都与一个剧集相关）。
一集可以包含多个角色、剪辑和图像。
一个Image/Clip只能与一个剧集相关，但可以与多个角色相关。
如果角色已经分配给剧集，那么分配给角色的任何 clip/image 只能来自其中一集，或者（如果是新的）必须只能拥有其中一集该角色出现的剧集与 clip/image.
如果一集已经分配了角色，则分配给该剧集的任何 clip/image 必须与这些角色中的至少一个相关，或者（如果是新角色）必须只能拥有一个或多个角色来自与 clip/image.

我设计的数据库结构是这样的：

这会生成以下 sql：

DROP TABLE IF EXISTS episodes;
DROP TABLE IF EXISTS personas;
DROP TABLE IF EXISTS personas_episodes;
DROP TABLE IF EXISTS clips;
DROP TABLE IF EXISTS personas_clips;
DROP TABLE IF EXISTS images;
DROP TABLE IF EXISTS personas_images;


CREATE TABLE episodes (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(120) NOT NULL UNIQUE,
plot TEXT,
tmdb_id VARCHAR(10) NOT NULL,
tvdb_id VARCHAR(10) NOT NULL,
imdb_id VARCHAR(10) NOT NULL);

CREATE TABLE personas (
id INT NOT NULL PRIMARY KEY,
name VARCHAR(30) NOT NULL,
bio TEXT NOT NULL);

CREATE TABLE personas_episodes (
persona_id INT NOT NULL,
episode_id INT NOT NULL,
PRIMARY KEY (persona_id,episode_id),
FOREIGN KEY(persona_id) REFERENCES personas(id),
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE clips (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(100) NOT NULL,
timestamp VARCHAR(7) NOT NULL,
link VARCHAR(100) NOT NULL,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE personas_clips (
clip_id INT NOT NULL,
persona_id INT NOT NULL,
PRIMARY KEY (clip_id,persona_id),
FOREIGN KEY(clip_id) REFERENCES clips(id),
FOREIGN KEY(persona_id) REFERENCES personas(id));

CREATE TABLE images (
id INT NOT NULL PRIMARY KEY,
link VARCHAR(120) NOT NULL UNIQUE,
path VARCHAR(120) NOT NULL UNIQUE,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE personas_images (
persona_id INT NOT NULL,
image_id INT NOT NULL,
PRIMARY KEY (persona_id,image_id),
FOREIGN KEY(persona_id) REFERENCES personas(id),
FOREIGN KEY(image_id) REFERENCES images(id));

并且我尝试在 SQLAchemy 模型中创建相同的模式（记住 SQLite 用于测试，PostgreSQL 用于生产），如下所示：

# db is a configured Flask-SQLAlchemy instance
from app import db
# Alias common SQLAlchemy names
Column = db.Column
relationship = db.relationship


class PkModel(Model):
    """Base model class that adds a 'primary key' column named ``id``."""
 
    __abstract__ = True
    id = Column(db.Integer, primary_key=True)
 
 
def reference_col(
    tablename, nullable=False, pk_name="id", foreign_key_kwargs=None, column_kwargs=None
):
    """Column that adds primary key foreign key reference.
 
    Usage: ::
 
        category_id = reference_col('category')
        category = relationship('Category', backref='categories')
    """
    foreign_key_kwargs = foreign_key_kwargs or {}
    column_kwargs = column_kwargs or {}
 
    return Column(
        db.ForeignKey(f"{tablename}.{pk_name}", **foreign_key_kwargs),
        nullable=nullable,
        **column_kwargs,
    )

personas_episodes = db.Table(
    "personas_episodes",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.ForeignKey("episodes.id"), primary_key=True),
)
 
personas_clips = db.Table(
    "personas_clips",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("clip_id", db.ForeignKey("clips.id"), primary_key=True),
)
 
personas_images = db.Table(
    "personas_images",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("image_id", db.ForeignKey("images.id"), primary_key=True),
)
 
 
class Persona(PkModel):
    """One of Roger's personas."""
 
    __tablename__ = "personas"
    name = Column(db.String(80), unique=True, nullable=False)
    bio = Column(db.Text)
    # relationships
    episodes = relationship("Episode", secondary=personas_episodes, back_populates="personas")
    clips = relationship("Clip", secondary=personas_clips, back_populates="personas")
    images = relationship("Image", secondary=personas_images, back_populates="personas")
 
    def __repr__(self):
        """Represent instance as a unique string."""
        return f"<Persona({self.name!r})>"
 
 
class Image(PkModel):
    """An image of one of Roger's personas from an episode of American Dad."""
    
    __tablename__ = "images"
    link = Column(db.String(120), unique=True)
    path = Column(db.String(120), unique=True)
    episode_id = reference_col("episodes")
    # relationships
    personas = relationship("Persona", secondary=personas_images, back_populates="images")
    
 
 
class Episode(PkModel):
    """An episode of American Dad."""
    
    # FIXME: We can add Clips and Images linked to Personas that are not assigned to this episode
 
    __tablename__ = "episodes"
    title = Column(db.String(120), unique=True, nullable=False)
    plot = Column(db.Text)
    tmdb_id = Column(db.String(10))
    tvdb_id = Column(db.String(10))
    imdb_id = Column(db.String(10))
    # relationships
    personas = relationship("Persona", secondary=personas_episodes, back_populates="episodes")
    images = relationship("Image", backref="episode")
    clips = relationship("Clip", backref="episode")
 
    def __repr__(self):
        """Represent instance as a unique string."""
        return f"<Episode({self.title!r})>"
 
 
class Clip(PkModel):
    """A clip from an episode of American Dad that contains one or more of Roger's personas."""
 
    __tablename__ = "clips"
    title = Column(db.String(80), unique=True, nullable=False)
    timestamp = Column(db.String(7), nullable=True)  # 00M:00S
    link = Column(db.String(7), nullable=True)
    episode_id = reference_col("episodes")
    # relationships
    personas = relationship("Persona", secondary=personas_clips, back_populates="clips")

但是，请注意 FIXME 注释。我无法弄清楚如何限制角色+图像、角色+剪辑和角色+剧集中的 many-to-many 关系，使它们在添加新条目以限制可能的添加之前相互查看到满足其他 many-to-many 关系标准的项目子集。

有人可以提供解决方案来确保 many-to-many 关系尊重 parent 表中的 episode_id 关系吗？

编辑以添加预期行为的伪模型示例

# omitting some detail fields for brevity
e1 = Episode(title="Some Episode")
e2 = Episode(title="Another Episode")
p1 = Persona(name="Raider Dave", episodes=[e1])
p2 = Persona(name="Ricky Spanish", episodes=[e2])
c1 = Clip(title="A clip", episode=e1, personas=[p2])  # should fail
i1 = Image(title="An image", episode=e2, personas=[p1]) # should fail
c2 = Clip(title="Another clip", episode=e1, personas=[p1])  # should succeed
i2 = Image(title="Another image", episode=e2, personas=[p2]) # should succeed

Answer 1

我想不出有什么方法可以在数据库上添加这个逻辑。在您的代码中管理这些约束是否可以接受？像这样：

事件：一张新图片将被插入到数据库中

# create new image with id = 1 and linked episode = 1
my_image = Image(...) 

# my personas
Persona.query.all()
[<Persona('Homer')>, <Persona('Marge')>, <Persona('Pikachu')>]

# my episodes
>>> Episode.query.all()
[<Episode('the simpson')>]

#my images
>>> Image.query.all()
[<Image 1>, <Image 2>]

# personas in first image
>>> Image.query.all()[0].personas
[<Persona('Marge')>]

# which episode?
>>> Image.query.all()[0].episode
<Episode('the simpson')>

# same as above but with next personas (Note that Pikachu is linked to the wrong episode)
>>> Image.query.all()[1].personas
[<Persona('Pikachu')>]
>>> Image.query.all()[1].episode
<Episode('the simpson')>

# before saving the Image object check its episode id, then for that episode get a list of personas that appear.
# Look for your persona id of Image inside this list to see if this persona appear in the episode

my_image.episode.id # return 1

# get a list of persona(s).id that appear in the episode linked to the image!
personas_in_episode = [ persona.id for persona in Episode.query.filter_by(id=1).first().personas ] # return list of id of Persona objects [1,2] (Homer and Marge as episode with id 1 is The Simpson)

my_image_personas = [ persona.id for persona in my_image.personas ] # return a list of id of Persona objects linked in image [3] Persona is Pikachu

>>> my_image_personas
[3]

>>> for persona_id in my_image_personas:
...     if persona_id not in personas_in_episode:
...         print(f"persona with id {persona_id} does not appear in the episode for this image") 
... 
persona with id 3 does not appear in the episode for this image

剪辑也是如此。

Answer 2

我认为你需要修改 personas_images 添加列 episode_id 然后将复合外键添加到 episode_id/personas_id 上的 personas_episode 并将图像外键修改为在 image_id/episode_id 上合成。这确保角色在剧集中并且图像在同一集中。

然后对剪辑执行类似操作。

Answer 3

我查看了您的 ER，发现以下实体的关系存在问题。在进行以下更改后，您的架构将完成。

CREATE TABLE episodes (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(120) NOT NULL UNIQUE,
plot TEXT,
tmdb_id VARCHAR(10) NOT NULL,
tvdb_id VARCHAR(10) NOT NULL,
imdb_id VARCHAR(10) NOT NULL
-- Need relationship as below
);

CREATE TABLE clips (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(100) NOT NULL,
timestamp VARCHAR(7) NOT NULL,
link VARCHAR(100) NOT NULL,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id) -- No need as below
);

CREATE TABLE images (
id INT NOT NULL PRIMARY KEY,
link VARCHAR(120) NOT NULL UNIQUE,
path VARCHAR(120) NOT NULL UNIQUE,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id)-- No need as below
);

正确的关系应该如下：

CREATE TABLE episodes (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(120) NOT NULL UNIQUE,
plot TEXT,
tmdb_id VARCHAR(10) NOT NULL,
tvdb_id VARCHAR(10) NOT NULL,
imdb_id VARCHAR(10) NOT NULL,
clips_id VARCHAR(10) NOT NULL,
clips_id INT NOT NULL ,
images_id INT NOT NULL ,
FOREIGN KEY(clips_id) REFERENCES clips(id)
FOREIGN KEY(images_id) REFERENCES images(id)
);

CREATE TABLE clips (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(100) NOT NULL,
timestamp VARCHAR(7) NOT NULL,
link VARCHAR(100) NOT NULL,
episode_id INT NOT NULL
);

CREATE TABLE images (
id INT NOT NULL PRIMARY KEY,
link VARCHAR(120) NOT NULL UNIQUE,
path VARCHAR(120) NOT NULL UNIQUE,
episode_id INT NOT NULL
);

Answer 4

添加：

不可为 null 的列 episode_id、
引用 personas_episode 和
自动填充触发器 episode_id。

不可为空的列和复合外键足以在数据库级别产生正确的约束，并确保只能在 SQLAlchemy 模型之外添加正确的数据。

由于 SQLAlchemy 模型不支持拦截 relationship.secondary 中引用的 Table 的 before_insert 事件，因此提出了触发器。

实施

SQLite 不支持在 BEFORE INSERT 触发器中修改 NEW.episode_id，这意味着我们必须在 AFTER INSERT 触发器中自动填充。因此，我们允许列为 nullable 并添加 2 个触发器以稍后检查 episode_id 约束。

episode_id_nullable = db.engine.dialect.name == "sqlite"                # Add this

personas_clips = db.Table(
    "personas_clips",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.Integer, nullable=episode_id_nullable),  # Add this
    db.Column("clip_id", db.ForeignKey("clips.id"), primary_key=True),
    db.ForeignKeyConstraint(["persona_id", "episode_id"], ["personas_episodes.persona_id", "personas_episodes.episode_id"]),  # Add this
)

personas_images = db.Table(
    "personas_images",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.Integer, nullable=episode_id_nullable),  # Add this
    db.Column("image_id", db.ForeignKey("images.id"), primary_key=True),
    db.ForeignKeyConstraint(["persona_id", "episode_id"], ["personas_episodes.persona_id", "personas_episodes.episode_id"]),  # Add this
)

SQLite 触发器：

插入前，检查 clip_id/image_id 在 episode 中引用 clip/image，其中 persona 在（基于 persona_episodes）。
更新前，请检查 episode_id 是否未设置为 NULL。
插入后，自动填充 episode_id。

SQLITE_CHECK_EPISODE_ID_BEFORE_INSERT = """
CREATE TRIGGER {table_name}_check_episode_id_before_insert BEFORE INSERT ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    SELECT RAISE(ABORT, 'NOT NULL constraint failed: {table_name}.episode_id') WHERE NOT EXISTS (
        SELECT 1
        FROM {fk_target_table_name}
        JOIN personas_episodes ON {fk_target_table_name}.episode_id = personas_episodes.episode_id
        WHERE {fk_target_table_name}.{fk_target_name} = NEW.{fk_name}
          AND personas_episodes.persona_id = NEW.persona_id
    );
  END;
"""

SQLITE_CHECK_EPISODE_ID_BEFORE_UPDATE = """
CREATE TRIGGER {table_name}_check_episode_id_before_update BEFORE UPDATE ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    SELECT RAISE(ABORT, 'NOT NULL constraint failed: {table_name}.episode_id');
  END;
"""

SQLITE_AUTOFILL_EPISODE_ID = """
CREATE TRIGGER {table_name}_autofill_episode_id AFTER INSERT ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    UPDATE {table_name}
    SET episode_id = (SELECT {fk_target_table_name}.episode_id
        FROM {fk_target_table_name}
        JOIN personas_episodes ON {fk_target_table_name}.episode_id = personas_episodes.episode_id
        WHERE {fk_target_table_name}.{fk_target_name} = NEW.{fk_name}
          AND personas_episodes.persona_id = NEW.persona_id)
    WHERE {fk_name} = NEW.{fk_name}
      AND persona_id = NEW.persona_id;
  END;
"""

PostgreSQL 触发器：

插入前，自动填充 episode_id。

POSTGRESQL_AUTOFILL_EPISODE_ID = """
CREATE OR REPLACE FUNCTION {table_name}_autofill_episode_id() RETURNS TRIGGER AS ${table_name}_autofill_episode_id$
  DECLARE
    _episode_id INT;
    in_episode BOOL;
  BEGIN
    IF NEW.episode_id IS NULL THEN
        SELECT episode_id INTO _episode_id FROM {fk_target_table_name} WHERE {fk_target_name} = NEW.{fk_name};
        SELECT TRUE INTO in_episode FROM personas_episodes WHERE persona_id = NEW.persona_id AND episode_id = _episode_id;
        IF in_episode IS NOT NULL THEN
            NEW.episode_id = _episode_id;
        END IF;
    END IF;
    RETURN NEW;
  END;
${table_name}_autofill_episode_id$ LANGUAGE plpgsql;

CREATE TRIGGER {table_name}_autofill_episode_id BEFORE INSERT OR UPDATE ON {table_name}
  FOR EACH ROW EXECUTE PROCEDURE {table_name}_autofill_episode_id();
"""

添加触发器 after_create 表 personas_clips 和 personas_images:

from sqlalchemy import event, text


def after_create_trigger_autofill_episode_id(target, connection, **kw):
    fk = next(fk for fk in target.foreign_keys if "personas" not in fk.column.table.name)
    if connection.dialect.name == "sqlite":
        connection.execute(text(SQLITE_CHECK_EPISODE_ID_BEFORE_INSERT.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name,fk_name=fk.parent.name)))
        connection.execute(text(SQLITE_CHECK_EPISODE_ID_BEFORE_UPDATE.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))
        connection.execute(text(SQLITE_AUTOFILL_EPISODE_ID.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))
    elif connection.dialect.name == "postgresql":
        connection.execute(text(POSTGRESQL_AUTOFILL_EPISODE_ID.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))


event.listen(personas_clips, "after_create", after_create_trigger_autofill_episode_id)
event.listen(personas_images, "after_create", after_create_trigger_autofill_episode_id)

测试用例

这是我目前根据问题中的预期行为得出的结果。

from sqlalchemy.exc import IntegrityError
from sqlalchemy.sql import select

from models import *

if db.engine.dialect.name == "sqlite":
    db.session.execute("pragma foreign_keys=on")
else:
    db.session.execute("""
DROP TABLE IF EXISTS episodes CASCADE;
DROP TABLE IF EXISTS personas CASCADE;
DROP TABLE IF EXISTS personas_episodes CASCADE;
DROP TABLE IF EXISTS clips CASCADE;
DROP TABLE IF EXISTS personas_clips;
DROP TABLE IF EXISTS images CASCADE;
DROP TABLE IF EXISTS personas_images;
""")
    db.session.commit()

db.create_all()

e1 = Episode(title="Some Episode")
e2 = Episode(title="Another Episode")
db.session.add(e1)
db.session.add(e2)
db.session.commit()

p1 = Persona(name="Raider Dave", episodes=[e1])
p2 = Persona(name="Ricky Spanish", episodes=[e2])
db.session.add(p1)
db.session.add(p2)
db.session.commit()

c1 = Clip(title="A clip", episode=e1, personas=[p2])         # should fail
db.session.add(c1)
try:
    db.session.commit()
    assert False
except IntegrityError:
    db.session.rollback()
assert Clip.query.first() is None, list(db.session.execute(select(personas_clips)))

i1 = Image(link="An image", episode=e2, personas=[p1])       # should fail
db.session.add(i1)
try:
    db.session.commit()
    assert False
except IntegrityError:
    db.session.rollback()
assert Image.query.first() is None, list(db.session.execute(select(personas_images)))

c2 = Clip(title="Another clip", episode=e1, personas=[p1])   # should succeed
db.session.add(c2)
db.session.commit()
assert Clip.query.first() is not None

i2 = Image(link="Another image", episode=e2, personas=[p2])  # should succeed
db.session.add(i2)
db.session.commit()
assert Image.query.first() is not None

没有成功的备选方案

SQLAlchemy 似乎不支持 Table 的 before_insert 事件，仅支持 Model.
https://docs.sqlalchemy.org/en/14/orm/events.html#sqlalchemy.orm.MapperEvents.before_insert

我尝试使用 Association Proxy，但无法完全支持 c2.personas.remove(p1)。 https://docs.sqlalchemy.org/en/14/orm/extensions/associationproxy.html

SQLAlchemy - 将 ForeignKeyConstraint 添加到基于另一个关系的 many-to-many table

SQLAlchemy - Adding a ForeignKeyConstraint to a many-to-many table that is based on another relationship

python

sql

实施

测试用例

没有成功的备选方案