最近在使用impdp导入数据的时候,因源库gbk与目标库utf8的字符集不一样,造成导入时目标库中表的字段长度不够。报错信息如下:
-
ORA-02374: conversion error loading table "HN_CM_IRMS_35_TNMS"."PTN_NE_TO_SERVICE"
-
ORA-12899: value too large for column TP_FDN (actual: 202, maximum: 200)
-
ORA-02372: data for row: TP_FDN : 0X'454D533D414E2D5433312D322D503A4D616E61676564456C65'
-
-
ORA-02374: conversion error loading table "HN_CM_IRMS_35_TNMS"."PTN_NE_TO_SERVICE"
-
ORA-12899: value too large for column TP_FDN (actual: 202, maximum: 200)
-
ORA-02372: data for row: TP_FDN : 0X'454D533D414E2D5433312D322D503A4D616E61676564456C65'
-
-
ORA-02374: conversion error loading table "HN_CM_IRMS_35_TNMS"."PTN_NE_TO_SERVICE"
-
ORA-12899: value too large for column TP_FDN (actual: 202, maximum: 200)
-
ORA-02372: data for row: TP_FDN : 0X'454D533D414E2D5433312D322D503A4D616E61676564456C65'
-
-
ORA-02374: conversion error loading table "HN_CM_IRMS_35_TNMS"."PTN_NE_TO_SERVICE"
-
ORA-12899: value too large for column TP_FDN (actual: 208, maximum: 200)
-
ORA-02372: data for row: TP_FDN : 0X'454D533D414E2D5433312D322D503A4D616E61676564456C65'
这个日志有150M,260W行。手动筛选信息肯定是不行的。
写了一个根据impdp导入日志信息中字段长度不够的错误信息自动生成修改表结构SQL的python3脚本
-
#!/usr/bin/python3
-
# -*- coding: utf-8 -*-
-
-
import os;
-
import sys;
-
import codecs;
-
import re;
-
-
def replace_file():
-
try:
-
input_filepath = '/home/oracle/impdp.log';
-
with codecs.open(input_filepath,'r','gb18030') as source_file:
-
source_lines = source_file.readlines();
-
output_filepath = '/home/oracle/mid_result.txt';
-
list_str = [];
-
tempstr = '';
-
for line in source_lines:
-
if line.startswith("ORA-02374"):
-
tempstr = line.strip();
-
elif line.startswith("ORA-12899"):
-
tempstr = tempstr + line.strip() + '\n';
-
list_str.append(tempstr);
-
else:
-
pass;
-
with codecs.open(output_filepath,'w+','utf-8') as output_file:
-
write_list(output_file,list_str);
-
print("Finish replace the file !");
-
except Exception as error:
-
print(error);
-
pass;
-
-
def create_sql():
-
try:
-
input_filepath = '/home/oracle/mid_result.txt';
-
with codecs.open(input_filepath,'r','utf-8') as source_file:
-
source_lines = source_file.readlines();
-
output_filepath = '/home/oracle/result.txt';
-
list_str = [];
-
set_table = set();
-
for line in source_lines:
-
target_len = int(re.sub(r'.*maximum:\s(\d+)\)',r'\1',line)) * 2;
-
mid_sql = line.replace('ORA-02374: conversion error loading table','ALTER TABLE').replace('ORA-12899: value too large for column',' MODIFY');
-
#list_str.append(line.replace('ORA-02374: conversion error loading table','ALTER TABLE').replace('ORA-12899: value too large for column',' MODIFY'));
-
set_table.add(re.sub(r'\(.*\)','varchar2(' + str(target_len) + ');',mid_sql));
-
with codecs.open(output_filepath,'w+','utf-8') as output_file:
-
write_set(output_file,set_table);
-
print("Finish generate sql !");
-
except Exception as error:
-
print(error);
-
pass;
-
-
def write_str(p_file, p_content):
-
try:
-
if isinstance(p_content, str):
-
p_file.write(p_content);
-
else:
-
print("it is not str");
-
except Exception as error:
-
print(error);
-
pass;
-
-
def write_list(p_file, p_content):
-
try:
-
if isinstance(p_content, list):
-
for item in p_content:
-
if isinstance(item, str):
-
write_str(p_file, item);
-
else:
-
print("Too complicated")
-
else:
-
print("it is not list");
-
except Exception as error:
-
print(error);
-
pass;
-
-
-
def write_set(p_file, p_content):
-
try:
-
if isinstance(p_content, set):
-
for item in p_content:
-
if isinstance(item, str):
-
write_str(p_file, item);
-
else:
-
print("Too complicated")
-
else:
-
print("it is not set");
-
except Exception as error:
-
print(error);
-
pass;
-
-
if __name__== '__main__':
-
replace_file();
-
create_sql();
最终生成的/home/oracle/result.txt就是用于修改表结构的SQL语句。
阅读(4435) | 评论(0) | 转发(0) |