@inproceedings{ae0248bf352e4a2a9a7e49e0e0aab58f,
title = "Prediction of transcription factor families using DNA sequence features",
abstract = "Understanding the mechanisms of protein-DNA interaction is of critical importance in biology. Transcription factor (TF) binding to a specific DNA sequence depends on at least two factors: A protein-level DNA-binding domain and a nucleotide-level specific sequence serving as a TF binding site. TFs have been classified into families based on these factors. TFs within each family bind to specific nucleotide sequences in a very similar fashion. Identification of the TF family that might bind at a particular nucleotide sequence requires a machine learning approach. Here we considered two sets of features based on DNA sequences and their physicochemical properties and applied a one-versus-all SVM (OVA-SVM) with class-wise optimized features to identify TF family-specific features in DNA sequences. Using this approach, a mean prediction accuracy of ~80% was achieved, which represents an improvement of ~7% over previous approaches on the same data.",
keywords = "Multi-class classification, Transcription factor family prediction",
author = "Ashish Anand and Fogel, {Gary B.} and Ganesan Pugalenthi and Suganthan, {P. N.}",
year = "2008",
doi = "10.1007/978-3-540-88436-1_14",
language = "English (US)",
isbn = "3540884343",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "154--164",
booktitle = "3rd IAPR International Conference on Pattern Recognition in Bioinformatics, PRIB 2008",
address = "Germany",
note = "3rd IAPR International Conference on Pattern Recognition in Bioinformatics, PRIB 2008 ; Conference date: 15-10-2008 Through 17-10-2008",
}