, Guest>> || || |
/ Oracle
  ""   [new]
Alexus12
Member

:
: 2868
, :

       1, 2, 3, ...
1/1       ....
2/1    ....
2/1   "" ....
2/1    ....
3/1  "  " ....
4/1     ....


"" ,
- : , ,
?

REGEXP_REPLACE, , , / .., "" ?
?
30 12, 18:37[13399054]      |
 Re: ""   [new]

Guest
.
30 12, 18:42[13399095]      |
 Re: ""   [new]
comphead
Member

:
: 3390
Alexus12,

UTL_MATCH

http://docs.oracle.com/cd/E14072_01/appdev.112/e10577/u_match.htm
30 12, 18:43[13399098]      |
 Re: ""   [new]
Alexus12
Member

:
: 2868
! UTL_MATCH , // / - ?
31 12, 10:02[13400645]      |
 Re: ""   [new]
orawish
Member

: -2 (City)
: 15487
Alexus12
! UTL_MATCH , // / - ?

31 12, 12:52[13401756]      |
 Re: ""   [new]
hexcept
Member

:
: 237
  
 '   " +"' 
 '     +  '?

:
+
-- drop table nen_cut_dict
/
create table nen_cut_dict
(
 phrase varchar2(400) not null unique check (phrase=upper(phrase)),
 abbrev varchar2(20) not null unique check (abbrev=upper(abbrev)),
 s_mask varchar2(512) not null check (s_mask=upper(s_mask)),
 r_mask varchar2(512) not null check (r_mask=upper(r_mask)),
 p_vari integer default 0 not null check (p_vari in (0,1))
) --tablespace <tablespace_name>
/
comment on column nen_cut_dict.phrase is '  /'
/
comment on column nen_cut_dict.abbrev is '/'
/
comment on column nen_cut_dict.s_mask is 'regexp- '
/
comment on column nen_cut_dict.r_mask is 'regexp- '
/
comment on column nen_cut_dict.p_vari is ' : 0 -  , 1 -  '
/
grant select on nen_cut_dict to public
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('    ','',
 '(^|\W)\W*[]+\W+\W+[]+\W+[]+\W+(\W|$)',
 '\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+\W+[]+\W+[]+\W+[]+(\W|$)',
 '\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[E]+\W+[]+\W+[]+\W+\W*[]+(\W|$)',
 '\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+(\W+)?\W+[]+[]+\W+[]+(\W|$)',
	'\1\3',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+(\W+)?\W+[]+[]+\W+[]+(\W|$)',
	'\1\3',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)\W*[]+(\W+)?\W+[]+[]+\W+[]+(\W|$)',
	'\1\3',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+\W+[]+\W+[]+\W+[]+(\W|$)',
	'\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+\W+\W*[]+\W+[]+\W+(\W|$)',
	'\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('   ','',
 '(^|\W)[]+\W+\W*[]+\W+[]+\W+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+\W*[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)\W?[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)\W*[]+\W+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)[]+\W+[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)\W*[]+\W+[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('  ','',
 '(^|\W)\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',0)
/
/*
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
*/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W*[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W*?\W*[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)\W*[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]*\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)\W*[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]*\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('- ','',
 '(^|\W)[]+\W+[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',1)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+?(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values (' ','',
 '(^|\W)[]+\W+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('','',
 '(^|\W)[]+\W*[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('','',
 '(^|\W)[]+(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('','',
 '(^|\W)(\W|$)','\1\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('LIMITED','LTD',
 '(^|\W)LIMITED(\W|$)','\1LTD\2',0)
/
insert into nen_cut_dict(phrase,abbrev,s_mask,r_mask,p_vari)
values ('','',
 '(^|\W)(\W|$)','\1\2',0)
/
commit
/

. NEN():
+
create or replace
function nen(
-- Normalization of Entity Name: <:.> for Oracle 10g+;
--   .    nen_cut_dict:
--    ,     ,
--     ,
--    (,,etc.)      ':';
-- hexcept, hexcept@gmail.com;
-- ps  4 --  ( ): +,,-  &
--  ,   ,   (
--  );
 p_nm varchar2,
 p_mode integer default 0 --  :
--              0 -         ':',
--           -     (  ':')
  ) return varchar2
 is
 l_nm varchar2(2000):=upper(trim(p_nm));
 l_sign_nm varchar2(2000):=null; --   ( ,  )
 l_own_patt varchar2(2000):=null; --  
 l_state pls_integer:=0; --  
 l_abbr varchar2(2000):=null; --  
 l_modi_nm varchar2(2000):=null; --   

 cursor cd_cur( c_nm varchar2,c_abbr varchar2 ) is
 select phrase,abbrev,s_mask,r_mask,p_vari from nen_cut_dict
 where
--        
-- (.. instr('   ',' ')>0):
  instr(c_nm,' '||abbrev)>0
--      
-- (.. '   ""'->'', instr('','')>0):
  or instr(c_abbr,abbrev)>0
  or instr(c_nm,' '||phrase)>0 --      
  or --  :
-- '   ',
-- '  ' -> 'OOO';
-- ' ','' -> ''
  (
   nvl(length(ltrim(rtrim(abbrev,c_abbr),c_abbr)),1)
    =case when length(abbrev)>2 then 1 end --  ""   
--       -      
   and nvl(length(replace(translate(' '||phrase,
    ' '||replace(c_nm,' '),' '),' ')),1)=1
   and
   ( --  2-     :
    abbrev||abbrev like '%'||regexp_replace(c_abbr,'(.)','\1%')
    or c_abbr||c_abbr like '%'||regexp_replace(abbrev,'(.)','\1%')
   )
  )
 order by
  decode(p_vari,1,least( --   -,      :
   decode(instr(replace(c_nm,'-',' '),' '||abbrev||' '),
   0,length(c_nm),instr(replace(c_nm,'-',' '),' '||abbrev||' ')),
   decode(instr(replace(c_nm,'-',' '),' '||phrase||' '),
   0,length(c_nm),instr(replace(c_nm,'-',' '),' '||phrase||' '))),0),
  length(phrase)-nvl(length(replace(phrase,' ')),0) desc, --  
  length(phrase) desc, --  
  phrase;

begin
 if l_nm is null then return ':'; end if;

 if instr(l_nm,'"',1,3)>0 then
--      2-,  ,    :
  l_sign_nm:=regexp_substr(l_nm,'".*"');
 elsif instr(l_nm,'"',1,2)>0 then --   2,  -  substr/instr
  l_sign_nm:=substr(l_nm,instr(l_nm,'"',1,1),
   instr(l_nm,'"',1,2)-instr(l_nm,'"',1,1)+1);
 end if;
--    (,     ):
 if l_sign_nm is not null then
--   ,  ; Att:    
--  'v' (  ),      -  :
  l_nm:=replace(l_nm,l_sign_nm,' v ');
  l_sign_nm:=trim(regexp_replace(replace(replace(replace(regexp_replace(
   l_sign_nm,'\W*([+&-])\W*','\1'),'+','+ '),'',' '),'&',' & '),
   '[^[:alnum:]+&-]+',' '));
 end if;
-- ""    :
 if l_nm is null then l_nm:=' ';
 else
  l_nm:=case when l_nm is null then ' ' else
   ' '||trim(regexp_replace(replace(replace(replace(regexp_replace(l_nm,
   '\W*([+-])\W*','\1'),'+','+ '),'',' '),
   '&',' & '),'[^[:alnum:]+&-]+',' '))||' ' end;
 end if;
 l_modi_nm:=l_nm; --      
 l_state:=1;
--     :
 while l_state>0 loop
  if l_state=1 then
   if trim(replace(l_nm,'v')) is null then l_state:=0; -- ,  
   else l_state:=2;
   end if;
  elsif l_state=2 then
   l_abbr:=
    replace(translate(initcap(replace(l_nm,'v')),' '||lower(l_nm),' '),' ');
   l_state:=3;
  elsif l_state=3 then
   for o in cd_cur(replace(l_nm,' v ',' '),l_abbr) loop
    if instr(replace(l_nm,'-',' '),' '||o.abbrev||' ')>0 then
--      , :
     l_nm:=replace(replace(replace(replace(l_nm,' '||o.abbrev||' ',' '),
      '-'||o.abbrev||' ',' '),' '||o.abbrev||'-',' '),'-'||o.abbrev||'-','-');
     l_state:=1;
    elsif instr(l_nm,' '||o.phrase||' ')>0 then --    
     l_nm:=replace(l_nm,' '||o.phrase||' ',' ');
     l_modi_nm:=replace(l_modi_nm,' '||o.phrase||' ',' '||o.abbrev||' ');
     l_state:=1;
    elsif regexp_like(l_nm,o.s_mask) then --     
     l_nm:=regexp_replace(l_nm,o.s_mask,' ');
     l_modi_nm:=regexp_replace(l_modi_nm,o.s_mask,o.r_mask);
     l_state:=1;
    end if;
    if l_state=1 then --  -   
     if o.p_vari=1 and l_own_patt is null then
--       ,  ( ):
      l_own_patt:=o.abbrev;
      if p_mode=0 then
       l_modi_nm:=replace(replace(replace(replace(l_modi_nm,
        ' '||o.abbrev||' ',' '),'-'||o.abbrev||' ',' '),' '||o.abbrev||'-',' '),
        '-'||o.abbrev||'-','-');
      end if;
     end if;
     exit; --   
    end if;
   end loop;
--     /   (..   ) - :
   if l_state=3 then l_state:=0; end if;
  end if;
 end loop;
 l_nm:=l_modi_nm;

 return trim(replace(l_nm,'v',l_sign_nm))||':'||l_own_patt;
end;

- NEN():
+
with u as
(
 select
  '   " +"' k1,
--  '     " + "' k2
  '     +  ' k2
 from dual
),
n as ( select nen(k1) nk1,nen(k2) nk2 from u )
select
 nk1, --   -1
 nk2, --   -2
--   :
 case when nk1=nk2 then ' ' else '  ' end eq,
--        :
 case when
--  nk1,   ,    :
  rpad(nk1||' ',(length(nk1)-length(replace(replace(nk1,' '),':'))+1)*length(nk1||' '),nk1||' ')
  like '%'||replace(replace(nk2,' ','%'),':','%')||'%'
  and
--  nk2,   ,    :
  rpad(nk2||' ',(length(nk2)-length(replace(replace(nk2,' '),':'))+1)*length(nk2||' '),nk2||' ')
  like '%'||replace(replace(nk1,' ','%'),':','%')||'%'
  then '  '
  else ' ' end diff
from n
2 12, 12:14[13413463]      |
/ Oracle