# In this example a two-class support vector machine classifier is trained on a # DNA splice-site detection data set and the trained classifier is used to predict # labels on test set. As training algorithm SVM^light is used with SVM # regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and # the precision parameter epsilon=1e-5. # # For more details on the SVM^light see # T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel # Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999. # # For more details on the Weighted Degree kernel see # G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively # spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005. #!/usr/bin/env python from tools.load import LoadMatrix lm=LoadMatrix() #traindat = lm.load_dna('../../../data/toy/hssData.dat') #label_traindat = lm.load_labels('../../../data/toy/label-hss.dat') traindat = lm.load_dna('../../../data/toy/hss200.dat') label_traindat = lm.load_labels('../../../data/toy/HSS200SequencesLabel.dat') #traindat = lm.load_dna('../../../data/toy/aluSequencesData.dat') #label_traindat = lm.load_labels('../../../data/toy/aluSequencesLabel.dat') #traindat = lm.load_dna('../../../data/toy/Alu240.dat') #label_traindat = lm.load_labels('../../../data/toy/AluLabel.dat') parameter_list = [[traindat,label_traindat,1.1,1e-5,1],[traindat,label_traindat,1.2,1e-5,1]] def classifier_svmlight_modular (fm_train_dna=traindat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=4): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ROCEvaluation, PRCEvaluation,ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.Features import StringCharFeatures, BinaryLabels, DNA,MSG_DEBUG from shogun.Kernel import WeightedDegreePositionStringKernel,OligoStringKernel from shogun.Classifier import LibSVM try: from shogun.Classifier import SVMLight, MSG_DEBUG except ImportError: print('No support for SVMLight available.') return print("completed svm importin") feats_train=StringCharFeatures(DNA) feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) degree=8 kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros,ones,float64,int32 #shift_vector = ones(len(fm_train_dna[0]),dtype=int32) #weight_vector = ones(len(fm_train_dna[0]), dtype=float64) #degree=8 #cachesize=500 #kernel=WeightedDegreePositionStringKernel(cachesize,weight_vector,degree,0,shift_vector,3,1) #kernel.init(feats_train,feats_train) #kernel.set_shifts(3*ones(len(fm_train_dna[0]), dtype=int32)) #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) #kernel=OligoStringKernel(10, 4, 1.2) #kernel.init(feats_train, feats_train) labels=BinaryLabels(label_train_dna) #svm=SVMLight(C, kernel, labels) #svm.set_epsilon(epsilon) #svm.parallel.set_num_threads(num_threads) svm=LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) print("completed svm initialization") svm.io.set_loglevel(MSG_DEBUG) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium=ROCEvaluation() # cross-validation instance cross_validation=CrossValidation(svm, feats_train, labels, splitting_strategy, evaluation_criterium) print("completed cross validation creation") cross_validation.set_autolock(False) # (optional) repeat x-val 10 times cross_validation.set_num_runs(1) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) #cross_validation.set_conf_int_alpha(0.05) # perform cross-validation and print(results) print("completed svm initialization") result=cross_validation.evaluate() print("completed crossvalidation") cvr = CrossValidationResult.obtain_from_generic(result) print("mean:", cvr.mean) print("auROC:", evaluation_criterium.get_auPRC()) print(evaluation_criterium.get_PRC()) if __name__=='__main__': print('SVMLight') classifier_svmlight_modular(*parameter_list[0])