Search code examples
oracle-databaseexternal-tables

how to load csv in oracle using external table


I want to load csv into oracle using external table. there is 1 column which needs to turn into a separate table using FK.

csv

    NAME,OPCO,CATEGORY,PRE_APPROVED,PRE_APPROVED_REQUESTEREXT_PRE_APPROVED_REQUESTER,EXT_PRE_APPROVED_REQUESTER,Authorized FW-Request Approver,AUTHORIZED_UPDATE_TEAM_ENTRY,WORK_INSTRUCTIONS_COMMENTS
col1,col2,col3,col4,col5,col6,col7, col8, col9
col1,col2,col3,col4,col5,col6,col7, col8, col9
col1,col2,col3,col4,col5,col6,col7, col8, col9

table1

ID  NUMBER(10,0)
NAME    VARCHAR2(50 BYTE)
OPCO    VARCHAR2(50 BYTE)
CATEGORY    VARCHAR2(50 BYTE)
PRE_APPROVED    NUMBER(1,0)
PRE_APPROVED_REQUESTER  VARCHAR2(20 BYTE)
EXT_PRE_APPROVED_REQUESTER  VARCHAR2(20 BYTE)
AUTHORIZED_UPDATE_TEAM_ENTRY    VARCHAR2(50 BYTE)
WORK_INSTRUCTIONS_COMMENTS  VARCHAR2(2000 BYTE)

table2

ID  NUMBER(10,0)
SOX_ID  NUMBER(10,0)
EMAIL   VARCHAR2(50 BYTE)

I want to be able to separate the col Authorized FW-Request Approver to table2. This is the 7th column. I want to make FK table so it is 1 to many relationship. table1 is PK, table2 has FK


Solution

  • For using external tables, you need the CREATE TABLE and CREATE ANY DIRECTORY permissions. (Example: Oracle 12c, Linux). In Oracle, create a directory.

    SQL> create directory external_tables as '/home/dbuser/datafiles';
    
    Directory created.
    

    Linux (create a sample csv file)

    $ cd /home/dbuser/datafiles
    
    $ echo "NAME,OPCO,CATEGORY,PRE_APPROVED,PRE_APPROVED_REQUESTER,EXT_PRE_APPROVED_REQUESTER,Authorized_FW_Request_Approver,AUTHORIZED_UPDATE_TEAM_ENTRY,WORK_INSTRUCTIONS_COMMENTS" > csvfile.csv
    $ echo "val11,val12,val13,val14,val15,val16,AUTHFRA_7000, val18, val19" >> csvfile.csv
    $ echo "val21,val22,val23,val24,val25,val26,AUTHFRA_7001, val28, val29" >> csvfile.csv
    $ echo "val31,val32,val33,val34,val35,val36,AUTHFRA_7002, val38, val39" >> csvfile.csv
    
    $ cat csvfile.csv 
    NAME,OPCO,CATEGORY,PRE_APPROVED,PRE_APPROVED_REQUESTER,EXT_PRE_APPROVED_REQUESTER,Authorized_FW_Request_Approver,AUTHORIZED_UPDATE_TEAM_ENTRY,WORK_INSTRUCTIONS_COMMENTS
    val11,val12,val13,val14,val15,val16,AUTHFRA_7000, val18, val19
    val21,val22,val23,val24,val25,val26,AUTHFRA_7001, val28, val29
    val31,val32,val33,val34,val35,val36,AUTHFRA_7002, val38, val39
    

    Oracle: external table

    create table external_ ( 
      NAME varchar2( 4000 )
    , OPCO varchar2( 4000 )
    , CATEGORY varchar2( 4000 )
    , PRE_APPROVED varchar2( 4000 )
    , PRE_APPROVED_REQUESTER varchar2( 4000 )
    , EXT_PRE_APPROVED_REQUESTER varchar2( 4000 )
    , Authorized_FW_Request_Approver varchar2( 4000 )
    , AUTHORIZED_UPDATE_TEAM_ENTRY varchar2( 4000 )
    , WORK_INSTRUCTIONS_COMMENTS varchar2( 4000 )
    ) 
    organization external (
      type oracle_loader 
      default directory external_tables
      access parameters 
      ( 
         records field names all files
         fields CSV without embedded record terminators
      ) 
      location 
      (
        'csvfile.csv'
      ) 
    )
    /
    

    SELECT from the external table.

    -- column aliases -> output more compact (just for this example) 
    select
      NAME                            c1    
    , OPCO                            c2
    , CATEGORY                        c3
    , PRE_APPROVED                    c4
    , PRE_APPROVED_REQUESTER          c5
    , EXT_PRE_APPROVED_REQUESTER      c6
    , Authorized_FW_Request_Approver  c7   
    , AUTHORIZED_UPDATE_TEAM_ENTRY    c8
    , WORK_INSTRUCTIONS_COMMENTS      c9
    from external_ ;
    
    -- result
    C1     C2     C3     C4     C5     C6     C7            C8     C9     
    val11  val12  val13  val14  val15  val16  AUTHFRA_7000  val18  val19  
    val21  val22  val23  val24  val25  val26  AUTHFRA_7001  val28  val29  
    val31  val32  val33  val34  val35  val36  AUTHFRA_7002  val38  val39 
    

    Then, you can create the tables TABLE1 and TABLE2, and use INSERT INTO ... SELECT ... FROM external_ for populating them.

    -- drop table table1 cascade constraints ;
    -- drop table table2 cascade constraints ;
    
    -- authfra: Authorized_FW_Request_Approver
    create table table1 (
      id number generated always as identity primary key
    -- other columns omitted
    , authfra varchar2(16 char) unique
    -- other columns omitted
    );
    
    create table table2 (
    -- other columns omitted
      authfra varchar2(16 char) references table1( authfra )
    -- other columns omitted
    );
    
    insert into table1 ( authfra )
    select distinct Authorized_FW_Request_Approver 
    from external_ ;
    
    3 rows inserted.
    
    insert into table2 ( authfra )
    select Authorized_FW_Request_Approver 
    from external_ ;
    
    3 rows inserted.
    

    Table1 / Table2 contain ...

    SQL> select * from table1 ;
    ID  AUTHFRA       
    1   AUTHFRA_7000  
    2   AUTHFRA_7001  
    3   AUTHFRA_7002  
    
    SQL> select * from table2 ;
    AUTHFRA       
    AUTHFRA_7000  
    AUTHFRA_7001  
    AUTHFRA_7002 
    

    Are you sure that TABLE1 should contain unique values for column 7? In your question, you have mentioned a PK column, and thus, I've used UNIQUE to highlight this. (I would have thought that TABLE2 should contain unique "Authorized_FW_Request_Approver" values and TABLE1 should have a foreign key constraint, referencing this column)

    However, maybe this does not really matter. As long as you are able to SELECT all necessary data from the external table, populating the "destination" tables should not be too difficult.