从 bool 为 true 的重复项中获取一个,或者如果 none 则回退
Get one from duplicates where bool is true or fallback if none
我有以下数据:
id | name | some_value | active
--------------------------------
1 | test | 1234 | false
2 | toast | qwert | false
3 | test | 4321 | true
4 | toast | trewq | false
我想要一个列表,其中每个名称都是“唯一的”。此外,我想获得那些 active
为真,如果没有找到真值,则返回数据库中的最后一项。
目前我有:
#!/usr/bin/env python3
import re
from sqlalchemy import create_engine, Column, Integer, String, Boolean, and_, or_
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func
from sqlalchemy.ext.declarative import as_declarative, declared_attr
PATTERN = re.compile(r"(?<!^)(?=[A-Z])")
engine = create_engine('sqlite:///:memory:', echo=True)
@as_declarative()
class Base:
id
__name__
# Generate __tablename__ automatically
@declared_attr
def __tablename__(cls) -> str:
return PATTERN.sub("_", cls.__name__).lower()
class Data(Base):
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
some_value = Column(String, nullable=False)
active = Column(Boolean, nullable=False, default=False)
def __repr__(self):
return f"{self.id} | {self.name} | {self.some_value} | {self.active}"
def __str__(self):
return f"{self.id} | {self.name} | {self.some_value} | {self.active}"
class CrudData:
def __init__(self, model):
self.model = model
def get_auto_all_active(self, db):
subq = db.query(
self.model.name,
func.max(self.model.id).label("maxid")
).group_by(self.model.name).subquery("groupedjob")
q = db.query(self.model).join(
subq,
or_(
and_(
self.model.name == subq.c.name,
self.model.id == subq.c.maxid
),
self.model.active == True
)
)
return q.all()
if __name__ == "__main__":
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
db = Session()
d1 = Data(name="test", some_value="4321", active=True)
d2 = Data(name="toast", some_value="qwert")
d3 = Data(name="test", some_value="1234")
d4 = Data(name="toast", some_value="trewq")
db.add_all([d1, d2, d3, d4])
db.commit()
crud = CrudData(Data)
for data in crud.get_auto_all_active(db):
print(data)
这导致:
1 | test | 4321 | True
3 | test | 1234 | False
4 | toast | trewq | False
但是我想要:
1 | test | 4321 | True
4 | toast | trewq | False
我该如何解决这个问题?我试图用 1 小时左右的时间来解决这个问题。任何帮助都是有帮助的。
编辑:使脚本完全可执行
如果无法访问您的数据库,这很难解决,但卸载过程可能如下所示:
示例数据:
import pandas as pd
df = pd.DataFrame({'id': {0: 1, 1: 2, 2: 3, 3: 4},
'name': {0: 'test', 1: 'toast', 2: 'test', 3: 'toast'},
'some_value': {0: '1234', 1: 'qwert', 2: '4321', 3: 'trewq'},
'active': {0: False, 1: False, 2: True, 3: False}})
代码:
df = df.groupby(['name', 'active'], as_index=False)[['id', 'some_value']].last()
df = df.sort_values(by=['active'], ascending=False)
df = df.drop_duplicates(subset=['name'])
df = df.set_index('id')
# name active some_value
# id
# 3 test True 4321
# 4 toast False trewq
我认为 ordering/ranking 解决了这个问题:
expr = func.rank().over(partition_by=self.model.name, order_by=[self.model.active.desc(), self.model.id.desc()]).label("therank")
subq = db.query(self.model.id, expr).subquery("subq")
q = db.query(self.model).join(subq, self.model.id == subq.c.id).filter(subq.c.therank == 1)
return q.all()
我有以下数据:
id | name | some_value | active
--------------------------------
1 | test | 1234 | false
2 | toast | qwert | false
3 | test | 4321 | true
4 | toast | trewq | false
我想要一个列表,其中每个名称都是“唯一的”。此外,我想获得那些 active
为真,如果没有找到真值,则返回数据库中的最后一项。
目前我有:
#!/usr/bin/env python3
import re
from sqlalchemy import create_engine, Column, Integer, String, Boolean, and_, or_
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func
from sqlalchemy.ext.declarative import as_declarative, declared_attr
PATTERN = re.compile(r"(?<!^)(?=[A-Z])")
engine = create_engine('sqlite:///:memory:', echo=True)
@as_declarative()
class Base:
id
__name__
# Generate __tablename__ automatically
@declared_attr
def __tablename__(cls) -> str:
return PATTERN.sub("_", cls.__name__).lower()
class Data(Base):
id = Column(Integer, primary_key=True, index=True)
name = Column(String, nullable=False)
some_value = Column(String, nullable=False)
active = Column(Boolean, nullable=False, default=False)
def __repr__(self):
return f"{self.id} | {self.name} | {self.some_value} | {self.active}"
def __str__(self):
return f"{self.id} | {self.name} | {self.some_value} | {self.active}"
class CrudData:
def __init__(self, model):
self.model = model
def get_auto_all_active(self, db):
subq = db.query(
self.model.name,
func.max(self.model.id).label("maxid")
).group_by(self.model.name).subquery("groupedjob")
q = db.query(self.model).join(
subq,
or_(
and_(
self.model.name == subq.c.name,
self.model.id == subq.c.maxid
),
self.model.active == True
)
)
return q.all()
if __name__ == "__main__":
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
db = Session()
d1 = Data(name="test", some_value="4321", active=True)
d2 = Data(name="toast", some_value="qwert")
d3 = Data(name="test", some_value="1234")
d4 = Data(name="toast", some_value="trewq")
db.add_all([d1, d2, d3, d4])
db.commit()
crud = CrudData(Data)
for data in crud.get_auto_all_active(db):
print(data)
这导致:
1 | test | 4321 | True
3 | test | 1234 | False
4 | toast | trewq | False
但是我想要:
1 | test | 4321 | True
4 | toast | trewq | False
我该如何解决这个问题?我试图用 1 小时左右的时间来解决这个问题。任何帮助都是有帮助的。
编辑:使脚本完全可执行
如果无法访问您的数据库,这很难解决,但卸载过程可能如下所示:
示例数据:
import pandas as pd
df = pd.DataFrame({'id': {0: 1, 1: 2, 2: 3, 3: 4},
'name': {0: 'test', 1: 'toast', 2: 'test', 3: 'toast'},
'some_value': {0: '1234', 1: 'qwert', 2: '4321', 3: 'trewq'},
'active': {0: False, 1: False, 2: True, 3: False}})
代码:
df = df.groupby(['name', 'active'], as_index=False)[['id', 'some_value']].last()
df = df.sort_values(by=['active'], ascending=False)
df = df.drop_duplicates(subset=['name'])
df = df.set_index('id')
# name active some_value
# id
# 3 test True 4321
# 4 toast False trewq
我认为 ordering/ranking 解决了这个问题:
expr = func.rank().over(partition_by=self.model.name, order_by=[self.model.active.desc(), self.model.id.desc()]).label("therank")
subq = db.query(self.model.id, expr).subquery("subq")
q = db.query(self.model).join(subq, self.model.id == subq.c.id).filter(subq.c.therank == 1)
return q.all()