从 bool 为 true 的重复项中获取一个,或者如果 none 则回退

Get one from duplicates where bool is true or fallback if none

我有以下数据:

id | name  | some_value | active
--------------------------------
 1 | test  | 1234       | false
 2 | toast | qwert      | false
 3 | test  | 4321       | true
 4 | toast | trewq      | false

我想要一个列表,其中每个名称都是“唯一的”。此外,我想获得那些 active 为真,如果没有找到真值,则返回数据库中的最后一项。

目前我有:

#!/usr/bin/env python3
import re
from sqlalchemy import create_engine, Column, Integer, String, Boolean, and_, or_
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func
from sqlalchemy.ext.declarative import as_declarative, declared_attr


PATTERN = re.compile(r"(?<!^)(?=[A-Z])")
engine = create_engine('sqlite:///:memory:', echo=True)


@as_declarative()
class Base:
    id
    __name__

    # Generate __tablename__ automatically
    @declared_attr
    def __tablename__(cls) -> str:
        return PATTERN.sub("_", cls.__name__).lower()


class Data(Base):
    id = Column(Integer, primary_key=True, index=True)
    name = Column(String, nullable=False)
    some_value = Column(String, nullable=False)
    active = Column(Boolean, nullable=False, default=False)

    def __repr__(self):
        return f"{self.id} | {self.name} | {self.some_value} | {self.active}"

    def __str__(self):
        return f"{self.id} | {self.name} | {self.some_value} | {self.active}"


class CrudData:
    def __init__(self, model):
        self.model = model

    def get_auto_all_active(self, db):
        subq = db.query(
            self.model.name,
            func.max(self.model.id).label("maxid")
        ).group_by(self.model.name).subquery("groupedjob")

        q = db.query(self.model).join(
            subq,
            or_(
                and_(
                    self.model.name == subq.c.name,
                    self.model.id == subq.c.maxid
                ),
                self.model.active == True
            )
        )
        return q.all()


if __name__ == "__main__":
    Base.metadata.create_all(engine)
    Session = sessionmaker(bind=engine)
    db = Session()

    d1 = Data(name="test", some_value="4321", active=True)
    d2 = Data(name="toast", some_value="qwert")
    d3 = Data(name="test", some_value="1234")
    d4 = Data(name="toast", some_value="trewq")

    db.add_all([d1, d2, d3, d4])
    db.commit()

    crud = CrudData(Data)

    for data in crud.get_auto_all_active(db):
        print(data)

这导致:

1 | test | 4321 | True
3 | test | 1234 | False
4 | toast | trewq | False

但是我想要:

1 | test | 4321 | True
4 | toast | trewq | False

我该如何解决这个问题?我试图用 1 小时左右的时间来解决这个问题。任何帮助都是有帮助的。

编辑:使脚本完全可执行

如果无法访问您的数据库,这很难解决,但卸载过程可能如下所示:

示例数据:

import pandas as pd
df = pd.DataFrame({'id': {0: 1, 1: 2, 2: 3, 3: 4},
 'name': {0: 'test', 1: 'toast', 2: 'test', 3: 'toast'},
 'some_value': {0: '1234', 1: 'qwert', 2: '4321', 3: 'trewq'},
 'active': {0: False, 1: False, 2: True, 3: False}})

代码:

df = df.groupby(['name', 'active'], as_index=False)[['id', 'some_value']].last()
df = df.sort_values(by=['active'], ascending=False)
df = df.drop_duplicates(subset=['name'])
df = df.set_index('id')

#      name  active some_value
# id                          
# 3    test    True       4321
# 4   toast   False      trewq

我认为 ordering/ranking 解决了这个问题:

expr = func.rank().over(partition_by=self.model.name, order_by=[self.model.active.desc(), self.model.id.desc()]).label("therank")
subq = db.query(self.model.id, expr).subquery("subq")
q = db.query(self.model).join(subq, self.model.id == subq.c.id).filter(subq.c.therank == 1)
return q.all()