java - Error when trying to create a Hive table using a custom SerDe -
i have file containing data needs uploaded hive table. wrote custom serde (which modification of regex serde available hive) me upload data.
this serde wrote
package my.hive.customserde; public class fiasc2 extends abstractserde { public static final log log = logfactory.getlog(fiasc2.class.getname()); int colwidths[] = {1, 10, 6, 12, 8, 14, 16, 6, 6, 2, 10, 10, 19, 2, 2, 6, 8, 1}; string outputformat = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s %12$s %13$s %14$s %15$s " + "%16$s %17$s %18$s"; int datetimecols[] = {5}; int datecols[] = {17}; string cols; int numcolumns; int totalcolwidth = 0; list<string> columnnames; list<typeinfo> columntypes; arraylist<string> row; structobjectinspector rowoi; object[] outputfields; text outputrowtext; @override public void initialize(configuration conf, properties tbl) throws serdeexception { log.debug("initializing serde"); // column names string columnnameproperty = tbl.getproperty(serdeconstants.list_columns); string columntypeproperty = tbl.getproperty(serdeconstants.list_column_types); log.debug("columns : " + columnnameproperty + "types : " + columntypeproperty); if(columnnameproperty.length() == 0) { columnnames = new arraylist<string>(); } else { columnnames = arrays.aslist(columnnameproperty.split(",")); } columntypes = typeinfoutils.gettypeinfosfromtypestring(columntypeproperty); assert columnnames.size() == columntypes.size(); assert colwidths.length == columnnames.size(); numcolumns = columnnames.size(); for(int = 0; < numcolumns; i++) { totalcolwidth += i; } list<objectinspector> columnois = new arraylist<objectinspector>(columnnames.size()); (int = 0; < numcolumns; i++) { columnois.add(primitiveobjectinspectorfactory.javastringobjectinspector); } rowoi = objectinspectorfactory.getstandardstructobjectinspector(columnnames, columnois); row = new arraylist<string>(numcolumns); for(int = 0; < numcolumns; i++) { row.add(null); } outputfields = new object[numcolumns]; outputrowtext = new text(); } @override public object deserialize(writable blob) throws serdeexception { // todo auto-generated method stub text rowtext = (text) blob; int index = 0; if(rowtext.tostring().length() < totalcolwidth) { return null; } if((rowtext.tostring().substring(0, 1) == "h") || (rowtext.tostring().substring(0, 1) == "t")) { return null; } for(int = 0; < numcolumns; i++) { int len = colwidths[i]; string col = rowtext.tostring().substring(index, index + len); // convert datetime string correct format can uploaded hive table if(arrays.aslist(datetimecols).contains(i)) { datetimeformatconverter dtc = new datetimeformatconverter(); try { col = dtc.convertcurrtonew(col); } catch (parseexception e) { log.error("unable parse date time string : " + col); e.printstacktrace(); } } if(arrays.aslist(datecols).contains(i)) { dateformatconverter dtc = new dateformatconverter(); try { col = dtc.convertcurrtonew(col); } catch (parseexception e) { log.error("unable parse date string : " + col); e.printstacktrace(); } } row.set(i, col); index += len; } return row; } @override public objectinspector getobjectinspector() throws serdeexception { return rowoi; } @override public serdestats getserdestats() { // todo auto-generated method stub return null; } @override public class<? extends writable> getserializedclass() { return text.class; } @override public writable serialize(object obj, objectinspector objinspector) throws serdeexception { if(outputformat == null) { throw new serdeexception("cannot write table because no output format specified"); } structobjectinspector outputrowoi = (structobjectinspector) objinspector; list<? extends structfield> outputfieldrefs = outputrowoi.getallstructfieldrefs(); if(outputfieldrefs.size() != numcolumns) { throw new serdeexception("output format not have same number fields number of columns"); } for(int = 0; < numcolumns; i++) { object field = outputrowoi.getstructfielddata(obj, outputfieldrefs.get(i)); objectinspector fieldoi = outputfieldrefs.get(i).getfieldobjectinspector(); stringobjectinspector fieldstringoi = (stringobjectinspector) fieldoi; outputfields[i] = fieldstringoi.getprimitivejavaobject(field); } string outputrowstring = null; try { outputrowstring = string.format(outputformat, outputfields); } catch (missingformatargumentexception e) { throw new serdeexception("the table contains " + numcolumns + "columns output format requires more", e); } outputrowtext.set(outputrowstring); return outputrowtext; }
}
you can rest assured have imported every class needs imported.
when try create table, error saying "unable field serde : my.hive.customserde.fiasc2"
here stacktrace
2015-08-25 15:57:51,995 error [hiveserver2-background-pool: thread-57]: metadata.table (table.java:getcols(608)) - unable field serde: my.hive.customserde.fiasc2 java.lang.nullpointerexception @ org.apache.hadoop.hive.metastore.metastoreutils.getfieldsfromdeserializer(metastoreutils.java:1257) @ org.apache.hadoop.hive.ql.metadata.table.getcols(table.java:605) @ org.apache.hadoop.hive.ql.metadata.hive.createtable(hive.java:694) @ org.apache.hadoop.hive.ql.exec.ddltask.createtable(ddltask.java:4135) @ org.apache.hadoop.hive.ql.exec.ddltask.execute(ddltask.java:306) @ org.apache.hadoop.hive.ql.exec.task.executetask(task.java:160) @ org.apache.hadoop.hive.ql.exec.taskrunner.runsequential(taskrunner.java:88) @ org.apache.hadoop.hive.ql.driver.launchtask(driver.java:1653) @ org.apache.hadoop.hive.ql.driver.execute(driver.java:1412) @ org.apache.hadoop.hive.ql.driver.runinternal(driver.java:1195) @ org.apache.hadoop.hive.ql.driver.run(driver.java:1059) @ org.apache.hadoop.hive.ql.driver.run(driver.java:1054) @ org.apache.hive.service.cli.operation.sqloperation.runquery(sqloperation.java:154) @ org.apache.hive.service.cli.operation.sqloperation.access$100(sqloperation.java:71) @ org.apache.hive.service.cli.operation.sqloperation$1$1.run(sqloperation.java:206) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:422) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1657) @ org.apache.hive.service.cli.operation.sqloperation$1.run(sqloperation.java:218) @ java.util.concurrent.executors$runnableadapter.call(executors.java:511) @ java.util.concurrent.futuretask.run(futuretask.java:266) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) @ java.lang.thread.run(thread.java:745) 2015-08-25 15:57:51,996 error [hiveserver2-background-pool: thread-57]: exec.ddltask (ddltask.java:failed(520)) - org.apache.hadoop.hive.ql.metadata.hiveexception: java.lang.nullpointerexception @ org.apache.hadoop.hive.ql.metadata.hive.createtable(hive.java:720) @ org.apache.hadoop.hive.ql.exec.ddltask.createtable(ddltask.java:4135) @ org.apache.hadoop.hive.ql.exec.ddltask.execute(ddltask.java:306) @ org.apache.hadoop.hive.ql.exec.task.executetask(task.java:160) @ org.apache.hadoop.hive.ql.exec.taskrunner.runsequential(taskrunner.java:88) @ org.apache.hadoop.hive.ql.driver.launchtask(driver.java:1653) @ org.apache.hadoop.hive.ql.driver.execute(driver.java:1412) @ org.apache.hadoop.hive.ql.driver.runinternal(driver.java:1195) @ org.apache.hadoop.hive.ql.driver.run(driver.java:1059) @ org.apache.hadoop.hive.ql.driver.run(driver.java:1054) @ org.apache.hive.service.cli.operation.sqloperation.runquery(sqloperation.java:154) @ org.apache.hive.service.cli.operation.sqloperation.access$100(sqloperation.java:71) @ org.apache.hive.service.cli.operation.sqloperation$1$1.run(sqloperation.java:206) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:422) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1657) @ org.apache.hive.service.cli.operation.sqloperation$1.run(sqloperation.java:218) @ java.util.concurrent.executors$runnableadapter.call(executors.java:511) @ java.util.concurrent.futuretask.run(futuretask.java:266) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) @ java.lang.thread.run(thread.java:745) caused by: java.lang.nullpointerexception @ org.apache.hadoop.hive.metastore.metastoreutils.getfieldsfromdeserializer(metastoreutils.java:1257) @ org.apache.hadoop.hive.ql.metadata.hive.createtable(hive.java:695) ... 21 more
i understand table creation failed. know why getting error? tried googling, didn't lot of help.
if of help, here create table script using.
create table if not exists fiasc2( record_type varchar(1), fin_id varchar(16), corp_id varchar(8), merc_id varchar(16), term_id varchar(8), tran_time timestamp, cashcard_number varchar(16), ttc varchar(8), tcc varchar(8), tran_type varchar(2), tran_amount varchar(16), deposit_amount varchar(16), pan varchar(32), account_type varchar(2), response_code varchar(2), card_balance varchar(8), settlement_date date, tran_mode varchar(1)) row format serde 'my.hive.customserde.fiasc2' location '/user/hive/fiasc2_test';
this sounds familiar. returning null
serdestats , thing i've seen may null , rings bell, think had same issue json serde when introduced serdestats few versions of hive ago. try:
// add members private serdestats stats; // ... public void initialize(configuration conf, properties tbl) throws serdeexception { .. // add in initialize() stats = new serdestats(); // , of course here @override public serdestats getserdestats() { return stats; }
Comments
Post a Comment