I like to export large amount of text data from db to file. The characterset in db is UTF8. The excepted result in the file is ISO8859P2 or MSWIN1250.
My db settings:
SELECT * FROM v$nls_parameters;
1 NLS_LANGUAGE HUNGARIAN 0
2 NLS_TERRITORY HUNGARY 0
9 NLS_CHARACTERSET UTF8 0
10 NLS_SORT HUNGARIAN 0
16 NLS_NCHAR_CHARACTERSET UTF8 0
17 NLS_COMP BINARY 0
18 NLS_LENGTH_SEMANTICS CHAR 0
19 NLS_NCHAR_CONV_EXCP FALSE 0
select * from nls_database_parameters;
1 NLS_RDBMS_VERSION 12.1.0.2.0
2 NLS_NCHAR_CONV_EXCP FALSE
15 NLS_NCHAR_CHARACTERSET UTF8
16 NLS_CHARACTERSET UTF8
19 NLS_TERRITORY AMERICA
20 NLS_LANGUAGE AMERICAN
select * from nls_session_parameters;
1 NLS_LANGUAGE HUNGARIAN
2 NLS_TERRITORY HUNGARY
9 NLS_SORT HUNGARIAN
15 NLS_COMP BINARY
16 NLS_LENGTH_SEMANTICS CHAR
17 NLS_NCHAR_CONV_EXCP FALSE
The file created at server directory (linux). I haven't more information for linux characterset settings.
The PLSQL code:
DECLARE
v_fh UTL_FILE.FILE_TYPE;
v_eol VARCHAR2(2);
v_eollen PLS_INTEGER;
CURSOR cur_sql IS
SELECT T3.ID_RESULT
,T3.column1
FROM table1
WHERE id_result = 999999
;
"ID_RESULT" DBMS_SQL.NUMBER_TABLE;
"column1" DBMS_SQL.VARCHAR2A;
BEGIN
EXECUTE IMMEDIATE 'ALTER SESSION SET NLS_DATE_FORMAT = ''YYYY.MM.DD HH24:MI:SS''';
v_eol := CHR(13)||CHR(10);
v_eollen := LENGTH(v_eol);
v_fh := UTL_FILE.FOPEN('REP_DIR','result_test.csv','W', 32000);
OPEN cur_sql;
LOOP
FETCH cur_sql
BULK COLLECT INTO "ID_RESULT",
"column1",
LIMIT 1000;
IF "ID_RESULT".COUNT > 0 THEN
FOR i IN "ID_RESULT".FIRST .. "ID_RESULT".LAST LOOP
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2','UTF8'));
UTL_FILE.PUT_nchar(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250','UTF8'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, "column1"(i));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, utl_raw.cast_to_varchar2(utl_raw.convert(utl_raw.cast_to_raw("column1"(i) ),'HUNGARIAN_HUNGARY.EE8MSWIN1250', 'ENGLISH_UNITED KINGDOM.UTF8')));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.fflush(v_fh);
END LOOP;
END IF;
EXIT WHEN cur_sql%NOTFOUND;
END LOOP;
CLOSE cur_sql;
UTL_FILE.FCLOSE(v_fh);
EXCEPTION
WHEN
.........
RAISE;
END;
The original value in db: value in hexa
The result in Notepad++ (encode in UTF8):
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere Lajosné
Csere Lajosn
The result in Notepad++ (encode in ANSI, char set: windows-1250):
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere Lajosné
Csere Lajosn
The result in Notepad++ (encode in ANSI, char set: iso-8859-2):
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere Lajosn
Csere LajosnĂŠ
Csere Lajosn
When i use CONVERT i lost "é" character. How can i do convert the string from UTF8 to ANSI?
Thanks, Zoltan
This is from mine toolbox package - try it/tweak. Works with different code pages in-out.
function dump_dsv_fast(p_query in varchar2
,p_filename in varchar2
,p_dir in varchar2 default c_DEFAULT_DIRECTORY
,p_separator in varchar2 default ';'
,p_text_qualifier in varchar2 default ''
,p_header in boolean default true
,p_eol_format in varchar2 default chr(13) || chr(10)
,p_characterset in varchar2 default 'EE8MSWIN1250'
,p_write_mode in varchar2 default 'WB') return number
/****************************************************
Formats:
AL32UTF8 --> full utf
WE8MSWIN1252 --> no pl chars
WE8MSWIN1250 --> pl ansi
Limitations: this one is faster filing buffer once
* 2014/05/13 --> change to raw (end line character)
* 2016/02/12 --> text qualifier
--> header on/off
--> line length adjust
--> text q
* 2016/03/09 --> codepage
*****************************************************/
is
l_output utl_file.file_type;
l_theCursor integer default dbms_sql.open_cursor;
l_columnValue varchar2(32760);
l_status integer;
l_separator varchar2(10);
l_colCnt number default 0;
l_descTbl dbms_sql.desc_tab;
l_cnt number default 0;
l_buffer raw(32767) := null;
begin
l_output := utl_file.fopen(p_dir, p_filename, p_write_mode, 32760);
dbms_sql.parse(l_theCursor, p_query, dbms_sql.native);
if p_header
then
dbms_sql.describe_columns(l_theCursor, l_colCnt, l_descTbl);
l_separator := '';
for i in 1 .. l_colCnt
loop
if p_characterset = 'EE8MSWIN1250'
then
utl_file.put_raw(l_output,
utl_raw.cast_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name || p_text_qualifier));
else
utl_file.put_raw(l_output,
utl_i18n.string_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name ||
p_text_qualifier,
p_characterset));
end if;
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output, utl_raw.cast_to_raw(p_eol_format));
end if;
for i in 1 .. 255
loop
begin
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_colCnt := i;
exception
when others then
if (sqlcode = -1007)
then
exit;
else
raise;
end if;
end;
end loop;
dbms_sql.define_column(l_theCursor, 1, l_columnValue, 32760);
l_status := dbms_sql.execute(l_theCursor);
loop
exit when(dbms_sql.fetch_rows(l_theCursor) <= 0);
l_separator := '';
for i in 1 .. l_colCnt
loop
dbms_sql.column_value(l_theCursor, i, l_columnValue);
if p_characterset = 'EE8MSWIN1250'
then
l_buffer := l_buffer ||
utl_raw.cast_to_raw(l_separator || p_text_qualifier ||
l_columnValue || p_text_qualifier);
else
l_buffer := l_buffer ||
utl_i18n.string_to_raw(l_separator || p_text_qualifier ||
l_columnValue ||
p_text_qualifier, p_characterset);
end if;
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output,
l_buffer || utl_raw.cast_to_raw(p_eol_format));
l_buffer := '';
l_cnt := l_cnt + 1;
end loop;
dbms_sql.close_cursor(l_theCursor);
utl_file.fclose(l_output);
return l_cnt;
end dump_dsv_fast;