ORACLE 一种热编码:对所有可用列使用 PIVOT 运算符

ORACLE one hot encoding: Use PIVOT operator for all available columns

假设我有以下 table 名为 t1:

CLID PRODUCT
1    A
1    B
2    A
2    C
3    A
3    C

我需要达到的是 'PRODUCT' 字段上的某种单热编码。 在 Oracle 中我们至少可以这样做:

select * from(
    select clid, product 
    from t1
    pivot(
        count(product)
        for product 
        in('A', 'B', 'C')
    )
)

然后我们得到结果:

CLID A B C
1    1 1 0
2    1 0 1 
3    1 0 1

但是当我们有大量产品时(假设1000件)就会出现问题,在这种情况下将所有产品都放入IN状态非常不方便。

所以我的问题是,是否有任何方法可以避免将所有可能的值都放入 'IN'? 如果没有这样的选项,那么也许还有另一种方法可以在 Oracle sql(或 pl/sql?)?

中进行单热编码

无论 table 中有多少不同的产品,您都可以使用 PL/SQL 匿名块自动构建查询,但您需要了解,您不能放置超过 1000 个将值放入数据透视子句中,因为列数不能超过 1000。

我会做这样的事情(假设总是少于 1000 个值)

测试用例(创建 table 和测试值)

SQL> create table t ( CLID number , PRODUCT varchar2(10) )  ;

Table created.

SQL> insert into t ( clid , product )
  2  with x ( a , b ) as
(
select 1  ,  'A' from dual union all
select 1  ,  'B' from dual union all
select 2  ,  'A' from dual union all
select 2  ,  'C' from dual union all
select 3  ,  'A' from dual union all
select 3  ,  'C' from dual union all
select 4  ,  'D' from dual union all
select 4  ,  'E' from dual union all
select 5  ,  'B' from dual union all
select 5  ,  'C' from dual union all
select 5  ,  'D' from dual union all
select 5  ,  'E' from dual
)
select a , b from x ;

12 rows created.

SQL> commit ;

Commit complete.

PLSQL构建

然后,无论有多少不同的产品,自动获取查询

set serveroutput on size unlimited lines 220 pages 0
declare
v_query       clob;
out_string    varchar2(100);
cursor c_ids 
is 
select distinct product, count(distinct(product)) over () tot_rows from t order by 1 asc;
procedure print_clob_to_output (p_clob in clob)
    is
      l_offset     pls_integer := 1;
      l_chars      pls_integer;
    begin
        loop
            exit when l_offset > dbms_lob.getlength(p_clob);
            l_chars := dbms_lob.instr(p_clob, chr(10), l_offset, 1);
            if l_chars is null or l_chars = 0 then
                l_chars := dbms_lob.getlength(p_clob) + 1;
            end if;
            dbms_output.put_line(dbms_lob.substr(p_clob, l_chars - l_offset, l_offset));
            l_offset := l_chars + 1;
        end loop;
    end print_clob_to_output;
begin
    dbms_output.enable(null);
    for item in c_ids 
    loop
        if item.tot_rows >= 1000
        then 
            raise_application_error(-20001,'Maximum number of 1000 columns are not allowed',true);
        end if;
        out_string := item.product;
        if c_ids%rowcount = 1 
        then 
            v_query := 'select * from (';
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,'  select *  ');
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' from t '); 
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' pivot( '); 
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' count(product) '); 
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' for product in ( '''||out_string||''' , ');
        elsif c_ids%rowcount < item.tot_rows then
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' '''||out_string||''' ,');
        else 
            dbms_lob.append(v_query,''||chr(10)||'');
            dbms_lob.append(v_query,' '''||out_string||''' ) ');
        end if;
    end loop;
    dbms_lob.append(v_query,''||chr(10)||'');
    dbms_lob.append(v_query,' ) )');
    print_clob_to_output(v_query);
end;
/

执行

SQL> @query.sql
SQL> set serveroutput on size unlimited lines 220 pages 0
SQL>     declare
  2      v_query       clob;
  3      out_string    varchar2(100);
  4      cursor c_ids
  5      is
  6      select distinct product, count(distinct(product)) over () tot_rows from t order by 1 asc;
  7      procedure print_clob_to_output (p_clob in clob)
  8          is
  9            l_offset     pls_integer := 1;
 10            l_chars      pls_integer;
 11          begin
 12              loop
 13                  exit when l_offset > dbms_lob.getlength(p_clob);
 14                  l_chars := dbms_lob.instr(p_clob, chr(10), l_offset, 1);
 15                  if l_chars is null or l_chars = 0 then
 16                      l_chars := dbms_lob.getlength(p_clob) + 1;
 17                  end if;
 18                  dbms_output.put_line(dbms_lob.substr(p_clob, l_chars - l_offset, l_offset));
 19                  l_offset := l_chars + 1;
 20              end loop;
 21          end print_clob_to_output;
 22      begin
 23          dbms_output.enable(null);
 24          for item in c_ids
 25          loop
 26              if item.tot_rows >= 1000
 27              then
 28                  raise_application_error(-20001,'Maximum number of 1000 columns are not allowed',true);
 29              end if;
 30              out_string := item.product;
 31              if c_ids%rowcount = 1
 32              then
 33                  v_query := 'select * from (';
 34                  dbms_lob.append(v_query,''||chr(10)||'');
 35                  dbms_lob.append(v_query,'  select *  ');
 36                  dbms_lob.append(v_query,''||chr(10)||'');
 37                  dbms_lob.append(v_query,' from t ');
 38                                  dbms_lob.append(v_query,''||chr(10)||'');
 39                  dbms_lob.append(v_query,' pivot( ');
 40                                  dbms_lob.append(v_query,''||chr(10)||'');
 41                  dbms_lob.append(v_query,' count(product) ');
 42                                  dbms_lob.append(v_query,''||chr(10)||'');
 43                  dbms_lob.append(v_query,' for product in ( '''||out_string||''' , ');
 44                          elsif c_ids%rowcount < item.tot_rows then
 45                  dbms_lob.append(v_query,''||chr(10)||'');
 46                  dbms_lob.append(v_query,' '''||out_string||''' ,');
 47              else
 48                  dbms_lob.append(v_query,''||chr(10)||'');
 49                  dbms_lob.append(v_query,' '''||out_string||''' ) ');
 50              end if;
 51          end loop;
 52          dbms_lob.append(v_query,''||chr(10)||'');
 53          dbms_lob.append(v_query,' ) )');
 54          print_clob_to_output(v_query);
 55      end;
 56      /
select * from (
select *
from t
pivot(
count(product)
for product in ( 'A' ,
'B' ,
'C' ,
'D' ,
'E' )
) )

PL/SQL procedure successfully completed.

SQL> select * from (
select *
from t
pivot(
count(product)
for product in ( 'A' ,
'B' ,
'C' ,
'D' ,
'E' )
) ) ;

      CLID        'A'        'B'        'C'        'D'        'E'
---------- ---------- ---------- ---------- ---------- ----------
         1          1          1          0          0          0
         2          1          0          1          0          0
         4          0          0          0          1          1
         5          0          1          1          1          1
         3          1          0          1          0          0