在 SQL 中计算校验和
Calculating a checksum in SQL
我有一个巴西 CPF 号码的数据库字段,想检查它们的有效性。这些是 11 位数字字符串,即 9 位数字和 2 位校验和数字。
我目前在 MS Excel 中实现了校验和(见下文),但我想找出一种在 SQL 中实现的方法。
校验和的工作原理如下:(坚持住,这太疯狂了。)
- CPF号码以ABCDEFGHI / JK的形式写成或直接写成
ABCDEFGHIJK,其中数字不能全部相同。
- J 称为 CPF 号码的第一位数字检查。
- K被称为CPF号的第二个校验位。
第一个数字 (J):
前9位的每一位都乘以一个常数:
10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I
将此和除以11,余数为0或1则J为0,余数>=2则J为11 - remainder
.
第二个数字 (K):(相同的计算但包括数字 J)
前10位的每一位乘以一个常数:
11A + 10B + 9C + 8D + 7E + 6F + 5G + 4H + 3I + 2J
将此和除以11,余数为0或1,则K为0。余数>=2,则K为11 - remainder
。
--在 MS 中的实现 Excel--
假设CPF在A2.
欢迎在这里进行优化,但不是这个问题的重点。
数字 J:=IF(MOD(SUM(MID($A2,1,1)*10,MID($A2,2,1)*9,MID($A2,3,1)*8,MID($A2,4,1)*7,MID($A2,5,1)*6,MID($A2,6,1)*5,MID($A2,7,1)*4,MID($A2,8,1)*3,MID($A2,9,1)*2),11)<=1,NUMBERVALUE(LEFT(RIGHT($A2,2),1))=0,NUMBERVALUE(LEFT(RIGHT($A2,2),1))=(11-MOD(SUM(MID($A2,1,1)*10,MID($A2,2,1)*9,MID($A2,3,1)*8,MID($A2,4,1)*7,MID($A2,5,1)*6,MID($A2,6,1)*5,MID($A2,7,1)*4,MID($A2,8,1)*3,MID($A2,9,1)*2),11)))
数字K:
=IF(MOD(SUM(MID($A2,1,1)*11,MID($A2,2,1)*10,MID($A2,3,1)*9,MID($A2,4,1)*8,MID($A2,5,1)*7,MID($A2,6,1)*6,MID($A2,7,1)*5,MID($A2,8,1)*4,MID($A2,9,1)*3,MID($A2,10,1)*2),11)<=1,NUMBERVALUE(LEFT(RIGHT($A2,1),1))=0,NUMBERVALUE(LEFT(RIGHT($A2,1),1))=(11-MOD(SUM(MID($A2,1,1)*11,MID($A2,2,1)*10,MID($A2,3,1)*9,MID($A2,4,1)*8,MID($A2,5,1)*7,MID($A2,6,1)*6,MID($A2,7,1)*5,MID($A2,8,1)*4,MID($A2,9,1)*3,MID($A2,10,1)*2),11)))
假设您有一个 table,其中有一个 id
主键列和一个 cpf
数据类型为 NUMBER(9,0)
的列,那么类似于:
WITH digits ( id, a, b, c, d, e, f, g, h, i ) AS (
SELECT id,
MOD( TRUNC( cpf / 1e8 ), 10 ),
MOD( TRUNC( cpf / 1e7 ), 10 ),
MOD( TRUNC( cpf / 1e6 ), 10 ),
MOD( TRUNC( cpf / 1e5 ), 10 ),
MOD( TRUNC( cpf / 1e4 ), 10 ),
MOD( TRUNC( cpf / 1e3 ), 10 ),
MOD( TRUNC( cpf / 1e2 ), 10 ),
MOD( TRUNC( cpf / 1e1 ), 10 ),
MOD( TRUNC( cpf / 1e0 ), 10 )
FROM your_table
),
values1 ( id, j, k ) AS (
SELECT id,
MOD( 10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I, 11 ),
11*A + 10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I
FROM digits
),
values2 ( id, j, k ) AS (
SELECT id,
CASE WHEN j <= 1 THEN 0 ELSE 11 - j END,
MOD( k + 2 * CASE WHEN j <= 1 THEN 0 ELSE 11 - j END, 11 )
FROM values1
)
SELECT id,
j,
CASE WHEN k <= 1 THEN 0 ELSE 11 - k END AS k
FROM values2
我的测试table:
-- Create a table called CPF
CREATE TABLE CPF(Id integer PRIMARY KEY, No integer);
-- Create few records in this table
INSERT INTO CPF VALUES(1, 12345678901);
我的嵌套 query:
SELECT No,
(CASE WHEN (J != J2) THEN 'J wrong!' ELSE 'J ok!' END) as Jchk,
(CASE WHEN (K != K2) THEN 'K wrong!' ELSE 'K ok!' END) as Kchk
FROM
(SELECT No, J, K,
(CASE WHEN MJ < 2 THEN 0 ELSE 11 - MJ END) as J2,
(CASE WHEN MK < 2 THEN 0 ELSE 11 - MK END) as K2
FROM
(SELECT No, J, K,
MOD(10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I, 11) as MJ,
MOD(11*A + 10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I + 2*J, 11) as MK
FROM
(SELECT
No,
substr(to_char(No), 1, 1) as A,
substr(to_char(No), 2, 1) as B,
substr(to_char(No), 3, 1) as C,
substr(to_char(No), 4, 1) as D,
substr(to_char(No), 5, 1) as E,
substr(to_char(No), 6, 1) as F,
substr(to_char(No), 7, 1) as G,
substr(to_char(No), 8, 1) as H,
substr(to_char(No), 9, 1) as I,
substr(to_char(No), 10, 1) as J,
substr(to_char(No), 11, 1) as K
FROM CPF)))
;
@SAR622:很好的问题,感谢算法。
这里有一个针对SQL服务器的t-SQL解决方案,以防万一。请注意,Cadastro de Pessoas Físicas (CPF) numbers can only have 11 digits (pre-panded by zeros), that is they cannot exceed 10^12-1. If you note 14 digit numbers in your dataset, these are likely to be Cadastro Nacional da Pessoa Jurídica (CNPJ) numbers issued to business (or typos or something else). The fake CPF and CNPJ numbers can be generated (in bulk) and validated (individually) here. Also this site 提供了有关由其 CNPJ 定位的企业的更多信息(将其视为隐式 CNPJ 验证)。验证 CPF 编号时,请记住检查该编号是否在 [0, 10^12-1] 范围内。您可能需要删除任何标点符号和其他无效字符(作为用户,我们往往会打错字)。
此输入 table 具有前 5 个无效 CPF 编号和后 4 个有效编号:
IF OBJECT_ID('tempdb..#x') IS NOT NULL DROP TABLE #x;
CREATE TABLE #x (CPF BIGINT default NULL);
INSERT INTO #x (CPF) VALUES (12345678900);
INSERT INTO #x (CPF) VALUES (11);
INSERT INTO #x (CPF) VALUES (1010101010101010);
INSERT INTO #x (CPF) VALUES (11111179011525590);
INSERT INTO #x (CPF) VALUES (-32081397641);
INSERT INTO #x (CPF) VALUES (00000008726210061);
INSERT INTO #x (CPF) VALUES (56000608314);
INSERT INTO #x (CPF) VALUES (73570630706);
INSERT INTO #x (CPF) VALUES (93957133564);
下面的 t-SQL 函数模块化了实现,但可能比后面的原始 t-SQL 慢。或者,您可以使用 TABLE input/output 或存储过程创建 t-SQL 函数。
ALTER FUNCTION fnIsCPF(@n BIGINT) RETURNS INT AS
BEGIN
DECLARE @isValid BIT = 0;
IF (@n > 0 AND @n < 100000000000)
BEGIN
--Parse out numbers
DECLARE @a TINYINT = FLOOR( @n / 10000000000)% 10;
DECLARE @b TINYINT = FLOOR( @n / 1000000000)% 10;
DECLARE @c TINYINT = FLOOR( @n / 100000000)% 10;
DECLARE @d TINYINT = FLOOR( @n / 10000000)% 10;
DECLARE @e TINYINT = FLOOR( @n / 1000000)% 10;
DECLARE @f TINYINT = FLOOR( @n / 100000)% 10;
DECLARE @g TINYINT = FLOOR( @n / 10000)% 10;
DECLARE @h TINYINT = FLOOR( @n / 1000)% 10;
DECLARE @i TINYINT = FLOOR( @n / 100)% 10;
DECLARE @j TINYINT = ISNULL(NULLIF(NULLIF(11-( 10*@a + 9*@b + 8*@c + 7*@d + 6*@e + 5*@f + 4*@g + 3*@h + 2*@i) % 11, 11), 10), 0);
DECLARE @k TINYINT = ISNULL(NULLIF(NULLIF(11 - (11*@a +10*@b + 9*@c + 8*@d + 7*@e + 6*@f + 5*@g + 4*@h + 3*@i + 2 * @j)% 11, 11), 10), 0);
RETURN CASE WHEN @j=FLOOR(@n / 10)% 10 AND @k=FLOOR(@n)% 10 THEN 1 ELSE 0 END
END;
RETURN @isValid;
END;
输出为:
SELECT CPF, isValid=dbo.fnIsCPF(CPF) FROM #x
CPF isValid
12345678900 0
11 0
1010101010101010 0
11111179011525590 0
-32081397641 0
8726210061 1
56000608314 1
73570630706 1
93957133564 1
t-SQL 对于 table:
WITH digits ( CPF, a, b, c, d, e, f, g, h, i ) AS (
SELECT CPF,
FLOOR( CPF / 10000000000)% 10,
FLOOR( CPF / 1000000000)% 10,
FLOOR( CPF / 100000000)% 10,
FLOOR( CPF / 10000000)% 10,
FLOOR( CPF / 1000000)% 10,
FLOOR( CPF / 100000)% 10,
FLOOR( CPF / 10000)% 10,
FLOOR( CPF / 1000)% 10,
FLOOR( CPF / 100)% 10
FROM #x
),
jk ( CPF, j, k ) AS (
SELECT CPF, ISNULL(NULLIF(NULLIF(11-( 10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I) % 11, 11), 10), 0),
11*A +10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I
FROM digits
),
jk2 ( CPF, j, k ) AS (
SELECT CPF, j, ISNULL(NULLIF(NULLIF(11 - (k + 2 * j)% 11, 11), 10), 0)
FROM jk
)
SELECT CPF, isValid=CASE WHEN CPF>0 AND CPF<99999999999 AND j=FLOOR( CPF / 10)% 10 AND k=FLOOR( CPF)% 10 THEN 1 ELSE 0 END
FROM jk2
产生相同的输出。
我有一个巴西 CPF 号码的数据库字段,想检查它们的有效性。这些是 11 位数字字符串,即 9 位数字和 2 位校验和数字。
我目前在 MS Excel 中实现了校验和(见下文),但我想找出一种在 SQL 中实现的方法。
校验和的工作原理如下:(坚持住,这太疯狂了。)
- CPF号码以ABCDEFGHI / JK的形式写成或直接写成 ABCDEFGHIJK,其中数字不能全部相同。
- J 称为 CPF 号码的第一位数字检查。
- K被称为CPF号的第二个校验位。
第一个数字 (J):
前9位的每一位都乘以一个常数:
10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I
将此和除以11,余数为0或1则J为0,余数>=2则J为
11 - remainder
.
第二个数字 (K):(相同的计算但包括数字 J)
前10位的每一位乘以一个常数:
11A + 10B + 9C + 8D + 7E + 6F + 5G + 4H + 3I + 2J
将此和除以11,余数为0或1,则K为0。余数>=2,则K为
11 - remainder
。
--在 MS 中的实现 Excel--
假设CPF在A2.
欢迎在这里进行优化,但不是这个问题的重点。
数字 J:=IF(MOD(SUM(MID($A2,1,1)*10,MID($A2,2,1)*9,MID($A2,3,1)*8,MID($A2,4,1)*7,MID($A2,5,1)*6,MID($A2,6,1)*5,MID($A2,7,1)*4,MID($A2,8,1)*3,MID($A2,9,1)*2),11)<=1,NUMBERVALUE(LEFT(RIGHT($A2,2),1))=0,NUMBERVALUE(LEFT(RIGHT($A2,2),1))=(11-MOD(SUM(MID($A2,1,1)*10,MID($A2,2,1)*9,MID($A2,3,1)*8,MID($A2,4,1)*7,MID($A2,5,1)*6,MID($A2,6,1)*5,MID($A2,7,1)*4,MID($A2,8,1)*3,MID($A2,9,1)*2),11)))
数字K:
=IF(MOD(SUM(MID($A2,1,1)*11,MID($A2,2,1)*10,MID($A2,3,1)*9,MID($A2,4,1)*8,MID($A2,5,1)*7,MID($A2,6,1)*6,MID($A2,7,1)*5,MID($A2,8,1)*4,MID($A2,9,1)*3,MID($A2,10,1)*2),11)<=1,NUMBERVALUE(LEFT(RIGHT($A2,1),1))=0,NUMBERVALUE(LEFT(RIGHT($A2,1),1))=(11-MOD(SUM(MID($A2,1,1)*11,MID($A2,2,1)*10,MID($A2,3,1)*9,MID($A2,4,1)*8,MID($A2,5,1)*7,MID($A2,6,1)*6,MID($A2,7,1)*5,MID($A2,8,1)*4,MID($A2,9,1)*3,MID($A2,10,1)*2),11)))
假设您有一个 table,其中有一个 id
主键列和一个 cpf
数据类型为 NUMBER(9,0)
的列,那么类似于:
WITH digits ( id, a, b, c, d, e, f, g, h, i ) AS (
SELECT id,
MOD( TRUNC( cpf / 1e8 ), 10 ),
MOD( TRUNC( cpf / 1e7 ), 10 ),
MOD( TRUNC( cpf / 1e6 ), 10 ),
MOD( TRUNC( cpf / 1e5 ), 10 ),
MOD( TRUNC( cpf / 1e4 ), 10 ),
MOD( TRUNC( cpf / 1e3 ), 10 ),
MOD( TRUNC( cpf / 1e2 ), 10 ),
MOD( TRUNC( cpf / 1e1 ), 10 ),
MOD( TRUNC( cpf / 1e0 ), 10 )
FROM your_table
),
values1 ( id, j, k ) AS (
SELECT id,
MOD( 10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I, 11 ),
11*A + 10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I
FROM digits
),
values2 ( id, j, k ) AS (
SELECT id,
CASE WHEN j <= 1 THEN 0 ELSE 11 - j END,
MOD( k + 2 * CASE WHEN j <= 1 THEN 0 ELSE 11 - j END, 11 )
FROM values1
)
SELECT id,
j,
CASE WHEN k <= 1 THEN 0 ELSE 11 - k END AS k
FROM values2
我的测试table:
-- Create a table called CPF
CREATE TABLE CPF(Id integer PRIMARY KEY, No integer);
-- Create few records in this table
INSERT INTO CPF VALUES(1, 12345678901);
我的嵌套 query:
SELECT No,
(CASE WHEN (J != J2) THEN 'J wrong!' ELSE 'J ok!' END) as Jchk,
(CASE WHEN (K != K2) THEN 'K wrong!' ELSE 'K ok!' END) as Kchk
FROM
(SELECT No, J, K,
(CASE WHEN MJ < 2 THEN 0 ELSE 11 - MJ END) as J2,
(CASE WHEN MK < 2 THEN 0 ELSE 11 - MK END) as K2
FROM
(SELECT No, J, K,
MOD(10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I, 11) as MJ,
MOD(11*A + 10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I + 2*J, 11) as MK
FROM
(SELECT
No,
substr(to_char(No), 1, 1) as A,
substr(to_char(No), 2, 1) as B,
substr(to_char(No), 3, 1) as C,
substr(to_char(No), 4, 1) as D,
substr(to_char(No), 5, 1) as E,
substr(to_char(No), 6, 1) as F,
substr(to_char(No), 7, 1) as G,
substr(to_char(No), 8, 1) as H,
substr(to_char(No), 9, 1) as I,
substr(to_char(No), 10, 1) as J,
substr(to_char(No), 11, 1) as K
FROM CPF)))
;
@SAR622:很好的问题,感谢算法。
这里有一个针对SQL服务器的t-SQL解决方案,以防万一。请注意,Cadastro de Pessoas Físicas (CPF) numbers can only have 11 digits (pre-panded by zeros), that is they cannot exceed 10^12-1. If you note 14 digit numbers in your dataset, these are likely to be Cadastro Nacional da Pessoa Jurídica (CNPJ) numbers issued to business (or typos or something else). The fake CPF and CNPJ numbers can be generated (in bulk) and validated (individually) here. Also this site 提供了有关由其 CNPJ 定位的企业的更多信息(将其视为隐式 CNPJ 验证)。验证 CPF 编号时,请记住检查该编号是否在 [0, 10^12-1] 范围内。您可能需要删除任何标点符号和其他无效字符(作为用户,我们往往会打错字)。
此输入 table 具有前 5 个无效 CPF 编号和后 4 个有效编号:
IF OBJECT_ID('tempdb..#x') IS NOT NULL DROP TABLE #x;
CREATE TABLE #x (CPF BIGINT default NULL);
INSERT INTO #x (CPF) VALUES (12345678900);
INSERT INTO #x (CPF) VALUES (11);
INSERT INTO #x (CPF) VALUES (1010101010101010);
INSERT INTO #x (CPF) VALUES (11111179011525590);
INSERT INTO #x (CPF) VALUES (-32081397641);
INSERT INTO #x (CPF) VALUES (00000008726210061);
INSERT INTO #x (CPF) VALUES (56000608314);
INSERT INTO #x (CPF) VALUES (73570630706);
INSERT INTO #x (CPF) VALUES (93957133564);
下面的 t-SQL 函数模块化了实现,但可能比后面的原始 t-SQL 慢。或者,您可以使用 TABLE input/output 或存储过程创建 t-SQL 函数。
ALTER FUNCTION fnIsCPF(@n BIGINT) RETURNS INT AS
BEGIN
DECLARE @isValid BIT = 0;
IF (@n > 0 AND @n < 100000000000)
BEGIN
--Parse out numbers
DECLARE @a TINYINT = FLOOR( @n / 10000000000)% 10;
DECLARE @b TINYINT = FLOOR( @n / 1000000000)% 10;
DECLARE @c TINYINT = FLOOR( @n / 100000000)% 10;
DECLARE @d TINYINT = FLOOR( @n / 10000000)% 10;
DECLARE @e TINYINT = FLOOR( @n / 1000000)% 10;
DECLARE @f TINYINT = FLOOR( @n / 100000)% 10;
DECLARE @g TINYINT = FLOOR( @n / 10000)% 10;
DECLARE @h TINYINT = FLOOR( @n / 1000)% 10;
DECLARE @i TINYINT = FLOOR( @n / 100)% 10;
DECLARE @j TINYINT = ISNULL(NULLIF(NULLIF(11-( 10*@a + 9*@b + 8*@c + 7*@d + 6*@e + 5*@f + 4*@g + 3*@h + 2*@i) % 11, 11), 10), 0);
DECLARE @k TINYINT = ISNULL(NULLIF(NULLIF(11 - (11*@a +10*@b + 9*@c + 8*@d + 7*@e + 6*@f + 5*@g + 4*@h + 3*@i + 2 * @j)% 11, 11), 10), 0);
RETURN CASE WHEN @j=FLOOR(@n / 10)% 10 AND @k=FLOOR(@n)% 10 THEN 1 ELSE 0 END
END;
RETURN @isValid;
END;
输出为:
SELECT CPF, isValid=dbo.fnIsCPF(CPF) FROM #x
CPF isValid
12345678900 0
11 0
1010101010101010 0
11111179011525590 0
-32081397641 0
8726210061 1
56000608314 1
73570630706 1
93957133564 1
t-SQL 对于 table:
WITH digits ( CPF, a, b, c, d, e, f, g, h, i ) AS (
SELECT CPF,
FLOOR( CPF / 10000000000)% 10,
FLOOR( CPF / 1000000000)% 10,
FLOOR( CPF / 100000000)% 10,
FLOOR( CPF / 10000000)% 10,
FLOOR( CPF / 1000000)% 10,
FLOOR( CPF / 100000)% 10,
FLOOR( CPF / 10000)% 10,
FLOOR( CPF / 1000)% 10,
FLOOR( CPF / 100)% 10
FROM #x
),
jk ( CPF, j, k ) AS (
SELECT CPF, ISNULL(NULLIF(NULLIF(11-( 10*A + 9*B + 8*C + 7*D + 6*E + 5*F + 4*G + 3*H + 2*I) % 11, 11), 10), 0),
11*A +10*B + 9*C + 8*D + 7*E + 6*F + 5*G + 4*H + 3*I
FROM digits
),
jk2 ( CPF, j, k ) AS (
SELECT CPF, j, ISNULL(NULLIF(NULLIF(11 - (k + 2 * j)% 11, 11), 10), 0)
FROM jk
)
SELECT CPF, isValid=CASE WHEN CPF>0 AND CPF<99999999999 AND j=FLOOR( CPF / 10)% 10 AND k=FLOOR( CPF)% 10 THEN 1 ELSE 0 END
FROM jk2
产生相同的输出。