基因数据处理85之adam

阅读: 评论:0

基因数据处理85之adam

基因数据处理85之adam

1.介绍:
cs-bwamem依赖的是adam-0.14.0里面的adamSave存储之后无法用adam-0.18.2的结果读取:

2.
adam-0.18.2 adamsave可以用loadParquetAlignments读取。但无法用loadParquetAlignments读取adam-0.14.2

3.代码:

    stimport java.nio.file.SimpleDateFormatimport java.util._import org.apache.adata.CompressionCodecNameimport org.apache.spark.rdd.RDDimport org.apache.spark.sql.SQLContextimport org.apache.spark.{SparkConf, SparkContext}import org.bdgenomics.adam.rdd.{ADAMSaveAnyArgs, ADAMContext}import org.bdgenomics.adam.rdd.ADAMContext._import org.bdgenomics.adam.rdd.ADAMContextimport org.bdgenomics.formats.avro.AlignmentRecord//import org.bdgenomics.avocado.AvocadoFunSuiteobject adamSaveTest {def resourcePath(path: String) = Resource(path).getFiledef tmpFile(path: String) = ateTempDirectory("").String + "/" + pathdef main(args: Array[String]) {println("start:")val conf = new SparkConf().Class().getSimpleName().filter(!_.equals('$'))).setMaster("local[4]")val sc = new SparkContext(conf)val ac = new ADAMContext(sc)val sqlContext = new SQLContext(sc)val samFile = &#aligned.sam"val rdd = sc.loadAlignments(resourcePath(samFile))unt())rdd.foreach(println)val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date())val output = "D:/all/idea/gcdss-master/file/learning/test/output/adamSaveTest" + iStringval save = new TestSaveArgs(output)rdd.adamSave(save)val rdd2 = sc.loadParquetAlignments(output)println("rdd2:" + unt)//    val samFile2 = "hdfs://219.219.220.149:9000/xubo/alignment/output/g38L100c50Nhs20upload2.adam/0"//    val rdd3 = sc.loadParquetAlignments(samFile2)//    println("rdd3:" + unt)println("end")sc.stop()}case class TestSaveArgs(var outputPath: String) extends ADAMSaveAnyArgs {var sortFastqOutput = falsevar asSingleFile = falsevar blockSize = 128 * 1024 * 1024var pageSize = 1 * 1024 * 1024var compressionCodec = CompressionCodecName.GZIP//    var logLevel = "SEVERE"var disableDictionaryEncoding = false}//  def adamSave(filePath: String, blockSize: Int = 128 * 1024 * 1024,//               pageSize: Int = 1 * 1024 * 1024, compressCodec: CompressionCodecName = CompressionCodecName.GZIP,//               disableDictionaryEncoding: Boolean = false) {}}

4.记录:

10
{"readNum": 0, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 0, "oldPosition": 10, "end": 70, "mapq": 100, "readName": "read2", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGGAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "34M10D26M", "oldCigar": "44M10D16M", "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tOP:i:11tNM:i:10tRG:Z:read_group_idtOC:Z:44M10D16M", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 110, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": 111}
{"readNum": 0, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 5, "oldPosition": null, "end": 75, "mapq": 90, "readName": "read1", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "29M10D31M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "29^GGGGGGGGGG10G0G0G0G0G0G0G0G0G0G11", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:20tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 105, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": 101}
{"readNum": 0, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 10, "oldPosition": 20, "end": 80, "mapq": 100, "readName": "read4", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGGAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "24M10D36M", "oldCigar": "34M10D26M", "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": null, "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tOP:i:21tNM:i:10tRG:Z:read_group_idtOC:Z:34M10D26M", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 120, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": 111}
{"readNum": 0, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 15, "oldPosition": null, "end": 85, "mapq": 90, "readName": "read3", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "19M10D41M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "19^GGGGGGGGGG10G0G0G0G0G0G0G0G0G0G21", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:20tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 115, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": 101}
{"readNum": 0, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 25, "oldPosition": null, "end": 95, "mapq": 90, "readName": "read5", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "9M10D51M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "9^GGGGGGGGGG10G0G0G0G0G0G0G0G0G0G31", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:20tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 125, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": 101}
{"readNum": 1, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 105, "oldPosition": null, "end": 165, "mapq": 90, "readName": "read1", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "60M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "60", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:0tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 5, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": -101}
{"readNum": 1, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 110, "oldPosition": null, "end": 170, "mapq": 90, "readName": "read2", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "60M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "60", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:100tNM:i:0tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 0, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": -111}
{"readNum": 1, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 115, "oldPosition": null, "end": 175, "mapq": 90, "readName": "read3", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "60M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "60", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:0tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 15, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": -101}
{"readNum": 1, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 120, "oldPosition": null, "end": 180, "mapq": 90, "readName": "read4", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "60M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "60", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:100tNM:i:0tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 10, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": -111}
{"readNum": 1, "contig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 125, "oldPosition": null, "end": 185, "mapq": 90, "readName": "read5", "sequence": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "qual": "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "cigar": "60M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": true, "properPair": true, "readMapped": true, "mateMapped": true, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "60", "origQual": null, "attributes": "XS:i:70tAS:i:70tMQ:i:90tNM:i:0tRG:Z:read_group_id", "recordGroupName": "read_group_id", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": "library", "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "platform_unit", "recordGroupSample": "sequencing_center", "mateAlignmentStart": 25, "mateAlignmentEnd": null, "mateContig": {"contigName": "artificial", "contigLength": 1120, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "inferredInsertSize": -101}
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See .html#StaticLoggerBinder for further details.
rdd2:10
2016-06-09 19:25:36 ERROR Executor:96 Exception in task 0.0 in stage 4.0 (TID 4)
org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/output/g38L100c50Nhs20upload2.adam/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(Unknown Source)at urrent.ThreadPoolExecutor$Worker.run(Unknown Source)at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:157)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:42)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:92)at org.apache.parquet.d(AvroIndexedRecordConverter.java:177)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more
2016-06-09 19:25:36 WARN  TaskSetManager:71 Lost task 0.0 in stage 4.0 (TID 4, localhost): org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/output/g38L100c50Nhs20upload2.adam/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(Unknown Source)at urrent.ThreadPoolExecutor$Worker.run(Unknown Source)at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:157)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:42)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:92)at org.apache.parquet.d(AvroIndexedRecordConverter.java:177)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more2016-06-09 19:25:36 ERROR TaskSetManager:75 Task 0 in stage 4.0 failed 1 times; aborting job
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 4.0 failed 1 times, most recent failure: Lost task 0.0 in stage 4.0 (TID 4, localhost): org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/output/g38L100c50Nhs20upload2.adam/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(Unknown Source)at urrent.ThreadPoolExecutor$Worker.run(Unknown Source)at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:157)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:42)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:92)at org.apache.parquet.d(AvroIndexedRecordConverter.java:177)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 moreDriver stacktrace:at org.apache.spark.scheduler.DAGScheduler$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271)at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270)llection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)llection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270)at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)at scala.Option.foreach(Option.scala:236)at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496)at org.apache.spark.Receive(DAGScheduler.scala:1458)at org.apache.spark.Receive(DAGScheduler.scala:1447)at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1921)at org.apache.spark.unt(RDD.scala:1125)st.adamSaveTest$.main(adamSaveTest.scala:47)st.adamSaveTest.main(adamSaveTest.scala)flect.NativeMethodAccessorImpl.invoke0(Native Method)flect.NativeMethodAccessorImpl.invoke(Unknown Source)flect.DelegatingMethodAccessorImpl.invoke(Unknown Source)at flect.Method.invoke(Unknown Source)at cution.application.AppMain.main(AppMain.java:144)
Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/output/g38L100c50Nhs20upload2.adam/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(Unknown Source)at urrent.ThreadPoolExecutor$Worker.run(Unknown Source)at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:157)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:42)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:92)at org.apache.parquet.d(AvroIndexedRecordConverter.java:177)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more

(2)adam-shell:

scala>  val file = "hdfs://219.219.220.149:9000/xubo/alignment/adam"
file: String = hdfs://219.219.220.149:9000/xubo/alignment/adamscala>  val rdd2=  sc.loadParquetAlignments(file)
rdd2: org.apache.spark.rdd.RDD[org.bdgenomics.formats.avro.AlignmentRecord] = MapPartitionsRDD[7] at map at ADAMContext.scala:167scala> unt
[Stage 3:>                                                         (0 + 2) / 18]16/06/09 17:17:54 ERROR Executor: Exception in task 1.0 in stage 3.0 (TID 4)
org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)at urrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to java.lang.Longat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:261)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:168)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:46)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:95)at org.apache.parquet.avro.AvroConverters$BinaryConverter.addValueFromDictionary(AvroConverters.java:81)at org.lumn.impl.ColumnReaderImpl$1.writeValue(ColumnReaderImpl.java:170)at org.lumn.impl.ColumnReaderImpl.writeCurrentValueToConverter(ColumnReaderImpl.java:365)at org.apache.parquet.ad(RecordReaderImplementation.java:405)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more
16/06/09 17:17:54 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)at urrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:168)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:46)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:95)at org.apache.parquet.d(AvroIndexedRecordConverter.java:189)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more
[Stage 3:>                                                         (0 + 3) / 18]16/06/09 17:17:54 ERROR TaskSetManager: Task 0 in stage 3.0 failed 1 times; aborting job
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3.0 failed 1 times, most recent failure: Lost task 0.0 in stage 3.0 (TID 3, localhost): org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)at urrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:168)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:46)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:95)at org.apache.parquet.d(AvroIndexedRecordConverter.java:189)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 moreDriver stacktrace:at org.apache.spark.scheduler.DAGScheduler$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271)at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270)llection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)llection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270)at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)at scala.Option.foreach(Option.scala:236)at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496)at org.apache.spark.Receive(DAGScheduler.scala:1458)at org.apache.spark.Receive(DAGScheduler.scala:1447)at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850)at org.apache.spark.SparkContext.runJob(SparkContext.scala:1921)at org.apache.spark.unt(RDD.scala:1125)at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:29)at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:34)at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:36)at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38)at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40)at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)at $iwC$$iwC$$iwC$$iwC.<init>(<console>:44)at $iwC$$iwC$$iwC.<init>(<console>:46)at $iwC$$iwC.<init>(<console>:48)at $iwC.<init>(<console>:50)at <init>(<console>:52)at .<init>(<console>:56)at .<clinit>(<console>)at .<init>(<console>:7)at .<clinit>(<console>)at $print(<console>)flect.NativeMethodAccessorImpl.invoke0(Native Method)flect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)flect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)at flect.Method.invoke(Method.java:606)at org.pl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)at org.pl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)at org.pl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)at org.pl.SparkIMain.interpret(SparkIMain.scala:871)at org.pl.SparkIMain.interpret(SparkIMain.scala:819)at org.allyInterpret$1(SparkILoop.scala:857)at org.pl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)at org.pl.SparkILoopmand(SparkILoop.scala:814)at org.pl.SparkILoop.processLine$1(SparkILoop.scala:657)at org.pl.SparkILoop.innerLoop$1(SparkILoop.scala:665)at org.pl.SparkILoop$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)at org.pl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)at org.pl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)at org.pl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)ls.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)at org.pl.SparkILoop$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)at org.pl.SparkILoop.process(SparkILoop.scala:1059)at org.pl.Main$.main(Main.scala:31)at org.pl.Main.main(Main.scala)flect.NativeMethodAccessorImpl.invoke0(Native Method)flect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)flect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)at flect.Method.invoke(Method.java:606)at org.apache.spark.deploy.SparkSubmit$$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674)at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block 0 in file hdfs://219.219.220.149:9000/xubo/alignment/parquetat org.apache.parquet.KeyValue(InternalParquetRecordReader.java:228)at org.apache.parquet.KeyValue(ParquetRecordReader.java:201)at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:163)at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)llection.Iterator$$anon$11.hasNext(Iterator.scala:327)at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1553)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1125)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1850)at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)at org.apache.spark.scheduler.Task.run(Task.scala:88)at org.utor.Executor$TaskRunner.run(Executor.scala:214)at urrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)at urrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: org.bdgenomics.formats.avro.Contig cannot be cast to java.lang.Integerat org.bdgenomics.formats.avro.AlignmentRecord.put(AlignmentRecord.java:257)at org.apache.parquet.avro.AvroIndexedRecordConverter.set(AvroIndexedRecordConverter.java:168)at org.apache.parquet.avro.AvroIndexedRecordConverter.access$000(AvroIndexedRecordConverter.java:46)at org.apache.parquet.avro.AvroIndexedRecordConverter$1.add(AvroIndexedRecordConverter.java:95)at org.apache.parquet.d(AvroIndexedRecordConverter.java:189)at org.apache.parquet.ad(RecordReaderImplementation.java:413)at org.apache.parquet.KeyValue(InternalParquetRecordReader.java:209)... 15 more

参考

【1】
【2】/ 
【3】
【4】
【5】  
【6】

研究成果:

【1】 [BIBM] Bo Xu, Changlong Li, Hang Zhuang, Jiali Wang, Qingfeng Wang, Chao Wang, and Xuehai Zhou, "Distributed Gene Clinical Decision Support System Based on Cloud Computing", in IEEE International Conference on Bioinformatics and Biomedicine. (BIBM 2017, CCF B)
【2】 [IEEE CLOUD] Bo Xu, Changlong Li, Hang Zhuang, Jiali Wang, Qingfeng Wang, Xuehai Zhou. Efficient Distributed Smith-Waterman Algorithm Based on Apache Spark (CLOUD 2017, CCF-C).
【3】 [CCGrid] Bo Xu, Changlong Li, Hang Zhuang, Jiali Wang, Qingfeng Wang, Jinhong Zhou, Xuehai Zhou. DSA: Scalable Distributed Sequence Alignment System Using SIMD Instructions. (CCGrid 2017, CCF-C).
【4】more: 

Help

If you have any questions or suggestions, please write it in the issue of this project or send an e-mail to me: xubo245@mail.ustc.edu
Wechat: xu601450868
QQ: 601450868

本文发布于:2024-02-02 20:17:22,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170687624146193.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:数据处理   基因   adam
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23