Hu, J. Edward; Rudinger, Rachel; Post, Matt; Durme, Benjamin Van
PARABANK: Monolingual Bitext Generation and Sentential Paraphrasing via Lexically-constrained Neural Journal Article
In: 2019.
@article{Hu2019,
title = {PARABANK: Monolingual Bitext Generation and Sentential Paraphrasing via Lexically-constrained Neural},
author = {J. Edward Hu and Rachel Rudinger and Matt Post and Benjamin Van Durme},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/2YXOZE-hu-etal-2019-parabank.pdf},
year = {2019},
date = {2019-01-27},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kothinti, Sandeep; Imoto, Keisuke; Chakrabarty, Debmalya; Sell, Gregory; Watanabe, Shinji; Elhilali, Mounya
JOINT ACOUSTIC AND CLASS INFERENCE FOR WEAKLY SUPERVISED SOUND EVENT DETECTION Conference
2018.
@conference{Kothinti2018,
title = {JOINT ACOUSTIC AND CLASS INFERENCE FOR WEAKLY SUPERVISED SOUND EVENT DETECTION},
author = {Sandeep Kothinti and Keisuke Imoto and Debmalya Chakrabarty and Gregory Sell and Shinji Watanabe and Mounya Elhilali},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/Kothinti__Imoto__Chakrabaarty__Sell__Watanable__Elhilali_-_Joint_Acoustic_and_Class_Inference_for_Weakly_Supervised_Sound_Event_Detection.pdf},
year = {2018},
date = {2018-11-01},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Sell, Gregory; Snyder, David; McCree, Alan; Garcia-Romero, Daniel; Villalba, Jesus; Maciejewski, Matthew ´; Manohar, Vimal; Dehak, Najim; Povey, Daniel; Watanabe, Shinji; Khudanpur, Sanjeev
Experiences and Lessons Learned for the JHU Team in the Inaugural DIHARD Challenge Conference
2018.
@conference{Sell2018,
title = {Experiences and Lessons Learned for the JHU Team in the Inaugural DIHARD Challenge},
author = {Gregory Sell and David Snyder and Alan McCree and Daniel Garcia-Romero and Jesus Villalba and Matthew ´
Maciejewski and Vimal Manohar and Najim Dehak and Daniel Povey and Shinji Watanabe and Sanjeev Khudanpur},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/IS18_dihard.pdf},
year = {2018},
date = {2018-09-02},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Lippincott, Thomas
Portable, layer-wise task performance monitoring for NLP models Proceedings
2018.
@proceedings{Lippincott2018,
title = {Portable, layer-wise task performance monitoring for NLP models},
author = {Thomas Lippincott},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/vivisect.pdf},
year = {2018},
date = {2018-09-01},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Lippincott, Tom; Carrell, Annabelle
Observational Comparison of Geo-tagged and Randomly-drawn Tweets Journal Article
In: 2018.
@article{Lippincott2018b,
title = {Observational Comparison of Geo-tagged and Randomly-drawn Tweets},
author = {Tom Lippincott and Annabelle Carrell},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/geo.pdf},
year = {2018},
date = {2018-06-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sell, Gregory; Duh, Kevin; Snyder, David; Etter, Dave; Garcia-Romero, Daniel
Audio-Visual Person Recognition in Multimedia Data from the IARPA Janus Program Proceedings
2018.
@proceedings{Sell2018b,
title = {Audio-Visual Person Recognition in Multimedia Data from the IARPA Janus Program},
author = {Gregory Sell and Kevin Duh and David Snyder and Dave Etter and Daniel Garcia-Romero},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/ICASSP18_Janus.pdf},
year = {2018},
date = {2018-01-01},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Khayrallah, Huda; Kumar, Gaurav; Duh, Kevin; Post, Matt; Koehn, Philipp
Neural Lattice Rescoring for Phrase-based Machine Translation Conference
2017.
@conference{Khayrallah2017,
title = {Neural Lattice Rescoring for Phrase-based Machine Translation},
author = {Huda Khayrallah and Gaurav Kumar and Kevin Duh and Matt Post and Philipp Koehn},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/I17-2004.pdf},
year = {2017},
date = {2017-05-01},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Lippincott, Tom; Durme, Benjamin Van
Fluency detection on communication networks Proceedings
2016.
@proceedings{Lippincott2016,
title = {Fluency detection on communication networks},
author = {Tom Lippincott and Benjamin Van Durme},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2020/04/fluency.pdf},
year = {2016},
date = {2016-11-01},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Yung, Frances; Duh, Kevin; Komura, Taku; Matsumoto, Yuji
Modelling the Interpretation of Discourse Connectives by Bayesian Pragmatics Proceedings Article
In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 531–536, Association for Computational Linguistics, Berlin, Germany, 2016.
@inproceedings{yung16connectives,
title = {Modelling the Interpretation of Discourse Connectives by Bayesian Pragmatics},
author = {Yung, Frances and Duh, Kevin and Komura, Taku and Matsumoto, Yuji},
url = {http://anthology.aclweb.org/P16-2086},
year = {2016},
date = {2016-08-01},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
pages = {531--536},
publisher = {Association for Computational Linguistics},
address = {Berlin, Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ding, Shuoyang; Duh, Kevin; Khayrallah, Huda; Koehn, Philipp; Post, Matt
The JHU Machine Translation Systems for WMT 2016 Proceedings Article
In: Proceedings of the First Conference on Machine Translation, pp. 272–280, Association for Computational Linguistics, Berlin, Germany, 2016.
@inproceedings{ding16wmt,
title = {The JHU Machine Translation Systems for WMT 2016},
author = {Ding, Shuoyang and Duh, Kevin and Khayrallah, Huda and Koehn, Philipp and Post, Matt},
url = {http://www.aclweb.org/anthology/W16-2310},
year = {2016},
date = {2016-08-01},
booktitle = {Proceedings of the First Conference on Machine Translation},
pages = {272--280},
publisher = {Association for Computational Linguistics},
address = {Berlin, Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yung, Frances; Duh, Kevin; Komura, Taku; Matsumoto, Yuji
Modelling the Usage of Discourse Connectives as Rational Speech Acts Proceedings Article
In: Proceedings of The 20th SIGNLL Conference on Computational Natural Language Learning, pp. 302–313, Association for Computational Linguistics, Berlin, Germany, 2016.
@inproceedings{yung16rational,
title = {Modelling the Usage of Discourse Connectives as Rational Speech Acts},
author = {Yung, Frances and Duh, Kevin and Komura, Taku and Matsumoto, Yuji},
url = {http://www.aclweb.org/anthology/K16-1030},
year = {2016},
date = {2016-08-01},
booktitle = {Proceedings of The 20th SIGNLL Conference on Computational Natural Language Learning},
pages = {302--313},
publisher = {Association for Computational Linguistics},
address = {Berlin, Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
McCree, Alan; Sell, Gregory; Garcia-Romero, Daniel
Augmented Data Training of Joint Acoustic/Phonotactic DNN i-vectors for NIST LRE15 Proceedings Article
In: Proceedings of Odyssey, 2016.
@inproceedings{McCree_Sell_Garcia-Romero_2016A,
title = {Augmented Data Training of Joint Acoustic/Phonotactic DNN i-vectors for NIST LRE15},
author = {Alan McCree and Gregory Sell and Daniel Garcia-Romero},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2016/10/McCree_Sell_Garcia-Romero_2016A.pdf},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of Odyssey},
abstract = {This paper presents the JHU HLTCOE submission to the NIST 2015 Language Recognition Evaluation, including critical and novel algorithmic components, use of limited and augmented training data, and additional post-evaluation analysis and improvements. All of our systems used i-vectors based on Deep Neural Networks (DNNs) with discriminatively-trained Gaussian classifiers, and linear fusion was performed with duration-dependent scaling. A key innovation was the use of three different kinds of i-vectors: acoustic, phonotactic, and joint. In addition, data augmentation was used to overcome the limited training data of this evaluation. Post-evaluation analysis shows the benefits of these design decisions as well as further potential improvements.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tanaka, Tomohiro; Moriya, Takafumi; Shinozaki, Takahiro; Watanabe, Shinji; Hori, Takaaki; Duh, Kevin
Automated structure discovery and parameter tuning of neural network language model based on evolution strategy Proceedings Article
In: Proceedings of the 2016 IEEE Workshop on Spoken Language Technology, 2016.
@inproceedings{tanaka16evolution,
title = {Automated structure discovery and parameter tuning of neural network language model based on evolution strategy},
author = {Tomohiro Tanaka and Takafumi Moriya and Takahiro Shinozaki and Shinji Watanabe and Takaaki Hori and Kevin Duh},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 2016 IEEE Workshop on Spoken Language Technology},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sell, Gregory; McCree, Alan; Garcia-Romero, Daniel
Priors for Speaker Counting and Diarization with AHC Proceedings Article
In: Proceedings of Interspeech, 2016.
@inproceedings{Sell_McCree_Garcia-Romero_2016A,
title = {Priors for Speaker Counting and Diarization with AHC},
author = {Gregory Sell and Alan McCree and Daniel Garcia-Romero},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2016/10/Sell_McCree_Garcia-Romero_2016A.pdf},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of Interspeech},
abstract = {Estimating the number of speakers in an audio segment is a necessary step in the process of speaker diarization, but current diarization algorithms do not explicitly define a prior probability on this estimation. This work proposes a process for including priors in speaker diarization with agglomerative hierarchical clustering (AHC). It is also shown that the exclusion of a prior with AHC is itself implicitly a prior, which is found to be geometric growth in the number of speakers. By using more sensible priors, we are able to demonstrate significantly improved robustness to calibration error for speaker counting and speaker diarization.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zheng, D.; Mhembere, D.; Lyzinski, V.; Vogelstein, J.; Priebe, C. E.; Burns, R.
Semi-External Memory Sparse Matrix Multiplication on Billion-node Graphs in a Multicore Architecture Journal Article
In: IEEE Transactions in Parallel and Distributed Systems, vol. Accepted for publication, 2016.
@article{zheng2016semi,
title = {Semi-External Memory Sparse Matrix Multiplication on Billion-node Graphs in a Multicore Architecture},
author = {Zheng, D. and Mhembere, D. and Lyzinski, V. and Vogelstein, J. and Priebe, C. E. and Burns, R.},
year = {2016},
date = {2016-01-01},
journal = {IEEE Transactions in Parallel and Distributed Systems},
volume = {Accepted for publication},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, L.; Vogelstein, J. T.; Lyzinski, V.; Priebe, C. E.
A joint graph inference case study: the c. elegans chemical and electrical connectomes Proceedings Article
In: Worm, Taylor & Francis 2016.
@inproceedings{chen2016joint,
title = {A joint graph inference case study: the c. elegans chemical and electrical connectomes},
author = {L. Chen and J. T. Vogelstein and V. Lyzinski and C. E. Priebe},
year = {2016},
date = {2016-01-01},
booktitle = {Worm},
volume = {5},
number = {2},
organization = {Taylor & Francis},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lyzinski, Vince; Tang, Minh; Athreya, Avanti; Park, Youngser; Priebe, Carey E
Community Detection and Classification in Hierarchical Stochastic Blockmodels Journal Article
In: IEEE Transactions on Network Science and Engineering, vol. accepted for publication, 2016.
@article{lyzinski2015community,
title = {Community Detection and Classification in Hierarchical Stochastic Blockmodels},
author = {Lyzinski, Vince and Tang, Minh and Athreya, Avanti and Park, Youngser and Priebe, Carey E},
year = {2016},
date = {2016-01-01},
journal = {IEEE Transactions on Network Science and Engineering},
volume = {accepted for publication},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lyzinski, V.
Information Recovery in Shuffled Graphs via Graph Matching Journal Article
In: arXiv preprint arXiv:1605.02315, 2016.
@article{lyzinski2016information,
title = {Information Recovery in Shuffled Graphs via Graph Matching},
author = {Lyzinski, V.},
year = {2016},
date = {2016-01-01},
journal = {arXiv preprint arXiv:1605.02315},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tang, M.; Athreya, A.; Sussman, D. L.; Lyzinski, V.; Park, Y.; Priebe, C. E.
A semiparametric two-sample hypothesis testing problem for random dot product graphs Journal Article
In: Journal of Computational and Graphical Statistics, vol. Accepted for publication, 2016.
@article{MT2,
title = {A semiparametric two-sample hypothesis testing problem for random dot product graphs},
author = {M. Tang and A. Athreya and D. L. Sussman and V. Lyzinski and Y. Park and C. E. Priebe},
year = {2016},
date = {2016-01-01},
journal = {Journal of Computational and Graphical Statistics},
volume = {Accepted for publication},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tsubaki, Masashi; Duh, Kevin; Shimbo, Masashi; Matsumoto, Yuji
Non-Linear Similarity Learning for Compositionality Proceedings Article
In: AAAI Conference on Artificial Intelligence, 2016.
@inproceedings{tsubaki16nonlinear,
title = {Non-Linear Similarity Learning for Compositionality},
author = {Masashi Tsubaki and Kevin Duh and Masashi Shimbo and Yuji Matsumoto},
url = {http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/12373},
year = {2016},
date = {2016-01-01},
booktitle = {AAAI Conference on Artificial Intelligence},
abstract = {Many NLP applications rely on the existence ofsimilarity measures over text data.Although word vector space modelsprovide good similarity measures between words,phrasal and sentential similarities derived from compositionof individual words remain as a difficult problem.In this paper, we propose a new method of ofnon-linear similarity learning for semantic compositionality.In this method, word representations are learnedthrough the similarity learning of sentencesin a high-dimensional space with kernel functions.On the task of predicting the semantic similarity oftwo sentences (SemEval 2014, Task 1),our method outperforms linear baselines,feature engineering approaches,recursive neural networks,and achieve competitive results with long short-term memory models.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Coppersmith, Glen; Dredze, Mark; Harman, Craig; Hollingshead, Kristy; Mitchell, Margaret
CLPsych 2015 Shared Task: Depression and PTSD on Twitter Proceedings Article
In: NAACL Workshop on Computational Linguistics and Clinical Psychology, 2015.
@inproceedings{Coppersmith:2015eu,
title = {CLPsych 2015 Shared Task: Depression and PTSD on Twitter},
author = {Coppersmith, Glen and Dredze, Mark and Harman, Craig and Kristy Hollingshead and Mitchell, Margaret},
year = {2015},
date = {2015-01-01},
booktitle = {NAACL Workshop on Computational Linguistics and Clinical Psychology},
abstract = {This paper presents a summary of the Computational Linguistics and Clinical Psychology (CLPsych) 2015 shared and unshared tasks. These tasks aimed to provide apples-to-apples comparisons of various approaches to modeling language relevant to mental health from social media. The data used for these tasks is from Twitter users who state a diagnosis of depression or post traumatic stress disorder (PTSD) and demographically-matched community controls. The unshared task was a hackathon held at Johns Hopkins University in November 2014 to explore the data, and the shared task was conducted remotely, with each participating team submitted scores for a held-back test set of users. The shared task consisted of three binary classification experiments: (1) depression versus control, (2) PTSD versus control, and (3) depression versus PTSD. Classifiers were compared primarily via their average precision, though a number of other metrics are used along with this to allow a more nuanced interpretation of the performance measures.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lyzinski, Vince; Sell, Gregory; Jansen, Aren
An Evaluation of Graph Clustering Methods for Unsupervised Term Discovery Proceedings Article
In: Proceedings of Interspeech, 2015.
@inproceedings{Lyzinski_Sell_Jansen_2015A,
title = {An Evaluation of Graph Clustering Methods for Unsupervised Term Discovery},
author = {Vince Lyzinski and Gregory Sell and Aren Jansen},
url = {https://hltcoe.jhu.edu/wp-content/uploads/2016/10/Lyzinski_Sell_Jansen_2015A.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of Interspeech},
abstract = {Unsupervised term discovery (UTD) is the task of automatically identifying the repeated words and phrases in a collection of speech audio without relying on any language-specific resources. While the solution space for the task is far from fully explored, the dominant approach to date decomposes the discovery problem into two steps, where (i) segmental dynamic time warping is used to search the speech audio for repeated acoustic patterns, and (ii) these individual repetitions are partitioned into word/phrase categories using graph clustering. In this paper, we perform an unprecedented evaluation of a wide range of advanced graph clustering methods for the UTD task. We conduct our study in the evaluation framework of the Zero Resource Speech Challenge. We find that, for a range of features and languages, modularity-based clustering improves UTD performance most consistently, often by a wide margin. When paired with out-of-language deep neural net bottleneck features, we find performance near that of a high-resource UTD system.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Paul, Michael; Dredze, Mark
SPRITE: Generalizing Topic Models with Structured Priors Journal Article
In: 2015.
@article{Paul:2015sf,
title = {SPRITE: Generalizing Topic Models with Structured Priors},
author = {Michael Paul and Dredze, Mark},
url = {https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/403/106},
year = {2015},
date = {2015-01-01},
abstract = {We introduce SPRITE, a family of topic models that incorporates structure into model priors as a function of underlying components. The structured priors can be constrained to model topic hierarchies, factorizations, correlations, and supervision, allowing SPRITE to be tailored to particular settings. We demonstrate this flexibility by constructing a SPRITE-based model to jointly infer topic hierarchies and author perspective, which we apply to corpora of political debates and online reviews. We show that the model learns intuitive topics, outperforming several other topic models at predictive tasks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Volkova, Svitlana; Yarowsky, David
Improving Gender Prediction of Social Media Users via Weighted Annotator Rationales Proceedings Article
In: NIPS 2014 Workshop on Personalization: Methods and Applications, Montreal, Canada, 2014.
@inproceedings{volkova-yarowsky:2014,
title = {Improving Gender Prediction of Social Media Users via Weighted Annotator Rationales},
author = {Volkova, Svitlana and Yarowsky, David},
year = {2014},
date = {2014-12-01},
booktitle = {NIPS 2014 Workshop on Personalization: Methods and Applications},
address = {Montreal, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Post, Matt; Lopez, Adam
The Machine Translation Leaderboard Journal Article
In: The Prague Bulletin of Mathematical Linguistics, vol. 102, no. 1, pp. 37–46, 2014.
@article{post2014machineb,
title = {The Machine Translation Leaderboard},
author = {Post, Matt and Lopez, Adam},
year = {2014},
date = {2014-10-01},
journal = {The Prague Bulletin of Mathematical Linguistics},
volume = {102},
number = {1},
pages = {37--46},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bojar, Ondrej; Buck, Christian; Federmann, Christian; Haddow, Barry; Koehn, Philipp; Leveling, Johannes; Monz, Christof; Pecina, Pavel; Post, Matt; Saint-Amand, Herve; Soricut, Radu; Specia, Lucia; Tamchyna, Aleš
Findings of the 2014 Workshop on Statistical Machine Translation Proceedings Article
In: Proceedings of the Ninth Workshop on Statistical Machine Translation, pp. 12–58, Association for Computational Linguistics, Baltimore, Maryland, USA, 2014.
@inproceedings{bojar-EtAl:2014:W14-33b,
title = {Findings of the 2014 Workshop on Statistical Machine Translation},
author = {Bojar, Ondrej and Buck, Christian and Federmann, Christian and Haddow, Barry and Koehn, Philipp and Leveling, Johannes and Monz, Christof and Pecina, Pavel and Post, Matt and Saint-Amand, Herve and Soricut, Radu and Specia, Lucia and Tamchyna, Aleš},
year = {2014},
date = {2014-06-01},
booktitle = {Proceedings of the Ninth Workshop on Statistical Machine Translation},
pages = {12--58},
publisher = {Association for Computational Linguistics},
address = {Baltimore, Maryland, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Sakaguchi, Keisuke; Post, Matt; Van Durme, Benjamin
Efficient Elicitation of Annotations for Human Evaluation of Machine Translation Proceedings Article
In: Proceedings of the Workshop on Statistical Machine Translation, Association for Computational Linguistics, Baltimore, Maryland, 2014.
@inproceedings{sakaguchi2014efficientb,
title = {Efficient Elicitation of Annotations for Human Evaluation of Machine Translation},
author = {Sakaguchi, Keisuke and Post, Matt and Van Durme, Benjamin},
year = {2014},
date = {2014-06-01},
booktitle = {Proceedings of the Workshop on Statistical Machine Translation},
publisher = {Association for Computational Linguistics},
address = {Baltimore, Maryland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Bojar, Ondrej; Buck, Christian; Federmann, Christian; Haddow, Barry; Koehn, Philipp; Leveling, Johannes; Monz, Christof; Pecina, Pavel; Post, Matt; Saint-Amand, Herve; Soricut, Radu; Specia, Lucia; Tamchyna, Aleš
Findings of the 2014 Workshop on Statistical Machine Translation Proceedings Article
In: Proceedings of the Ninth Workshop on Statistical Machine Translation, pp. 12–58, Association for Computational Linguistics, Baltimore, Maryland, USA, 2014.
@inproceedings{bojar-EtAl:2014:W14-33,
title = {Findings of the 2014 Workshop on Statistical Machine Translation},
author = {Ondrej Bojar and Christian Buck and Christian Federmann and Barry Haddow and Koehn, Philipp and Johannes Leveling and Christof Monz and Pavel Pecina and Post, Matt and Herve Saint-Amand and Radu Soricut and Lucia Specia and Aleš Tamchyna},
url = {http://aclweb.org/anthology/W/W14/W14-3302.pdf},
year = {2014},
date = {2014-06-01},
booktitle = {Proceedings of the Ninth Workshop on Statistical Machine Translation},
pages = {12--58},
publisher = {Association for Computational Linguistics},
address = {Baltimore, Maryland, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Garcia-Romero, Daniel; McCree, Alan; Shum, Stephen; Brummer, Niko; Vaquero, Carlos
UNSUPERVISED DOMAIN ADAPTATION FOR I-VECTOR SPEAKER RECOGNITION
2014.
@{Romero:McCree:2014,
title = {UNSUPERVISED DOMAIN ADAPTATION FOR I-VECTOR SPEAKER RECOGNITION},
author = {Garcia-Romero, Daniel and McCree, Alan and Stephen Shum and Niko Brummer and Carlos Vaquero},
year = {2014},
date = {2014-06-01},
publisher = {Odyssey},
abstract = {In this paper, we present a framework for unsupervised domain adaptation of PLDA based i-vector speaker recognition systems. Given an existing out-of-domain PLDA system, we use it to cluster unlabeled
in-domain data, and then use this data to adapt the parameters of the PLDA system. We explore two versions of agglomerative hierarchical clustering that use the PLDA system. We also study two
automatic ways to determine the number of clusters in the in-domain dataset. The proposed techniques are experimentally validated in the recently introduced domain adaptation challenge. This challenge
provides a very useful setup to explore domain adaptation since it illustrates a significant performance gap between an in-domain and out-of-domain system. Using agglomerative hierarchical clustering with a stopping criterion based on unsupervised calibration we are able to recover 85% of this gap.},
keywords = {},
pubstate = {published},
tppubtype = {}
}
Ganapathy, Sriram; Harish, Sri; Hermansky, Hynek
Robust Feature Extraction Using Modulation Filtering of Autoregressive Models
2014.
@{b,
title = {Robust Feature Extraction Using Modulation Filtering of Autoregressive Models},
author = {Ganapathy, Sriram and Sri Harish and Hynek Hermansky},
url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=&arnumber=6826560&queryText%3DRobust+Feature+Extraction+Using+Modulation+Filtering+of+Autoregressive+models},
year = {2014},
date = {2014-06-01},
publisher = {IEEE},
abstract = {Speaker and language recognition in noisy and degraded channel conditions continue to be a challenging problem mainly due to the mismatch between clean training and noisy test conditions. In the presence of noise, the most reliable portions of the signal are the high energy regions which can be used for robust feature extraction. In this paper, we propose a front end processing scheme based on autoregressive (AR) models that represent the high energy regions with good accuracy followed by a modulation filtering process. The AR model of the spectrogram is derived using two separable time and frequency AR transforms. The first AR model (temporal AR model) of the sub-band Hilbert envelopes is derived using frequency domain linear prediction (FDLP). This is followed by a spectral AR model applied on the FDLP envelopes. The output 2-D AR model represents a low-pass modulation filtered spectrogram of the speech signal. The band-pass modulation filtered spectrograms can further be derived by dividing two AR models with different model orders (cut-off frequencies). The modulation filtered spectrograms are converted to cepstral coefficients and are used for a speaker recognition task in noisy and reverberant conditions. Various speaker recognition experiments are performed with clean and noisy versions of the NIST-2010 speaker recognition evaluation (SRE) database using the state-of-the-art speaker recognition system. In these experiments, the proposed front-end analysis provides substantial improvements (relative improvements of up to 25%) compared to baseline techniques. Furthermore, we also illustrate the generalizability of the proposed methods using language identification (LID) experiments on highly degraded high-frequency (HF) radio channels and speech recognition experiments on noisy data.},
keywords = {},
pubstate = {published},
tppubtype = {}
}
Gormley, Matt; Mitchell, Margaret; Van Durme, Benjamin; Dredze, Mark
Low-Resource Semantic Role Labeling Proceedings Article
In: Association for Computational Linguistics (ACL), 2014.
@inproceedings{gormley-etal:2014:SRL,
title = {Low-Resource Semantic Role Labeling},
author = {Gormley, Matt and Mitchell, Margaret and Van Durme, Benjamin and Dredze, Mark},
url = {http://www.cs.jhu.edu/~mrg/publications/srl-acl-2014.pdf},
year = {2014},
date = {2014-06-01},
booktitle = {Association for Computational Linguistics (ACL)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
McCree, Alan
Multiclass Discriminative Training of i-vector Language Recognition Proceedings Article
In: Odyssey, 2014.
@inproceedings{b,
title = {Multiclass Discriminative Training of i-vector Language Recognition},
author = {McCree, Alan},
year = {2014},
date = {2014-05-01},
booktitle = {Odyssey},
abstract = {The current state-of-the-art for acoustic language
recognition is an i-vector classifier followed by a
discriminatively-trained multiclass back-end. This paper
presents a unified approach, where a Gaussian i-vector
classifier is trained using Maximum Mutual Information
(MMI) to directly optimize the multiclass calibration criterion,
so that no separate back-end is needed. The system
is extended to the open set task by training an additional
Gaussian model. Results on the NIST LRE11
standard evaluation task confirm that high performance
is maintained with this new single-stage approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
McCree, Alan; Garcia-Romero, Daniel
SUPERVISED DOMAIN ADAPTATION FOR I-VECTOR BASED SPEAKER RECOGNITION
2014.
@{b,
title = {SUPERVISED DOMAIN ADAPTATION FOR I-VECTOR BASED SPEAKER RECOGNITION},
author = {McCree, Alan and Garcia-Romero, Daniel},
year = {2014},
date = {2014-05-01},
pages = {4047 - 4051},
publisher = {IEEE},
institution = {Human Language Technology Center of Excellence, Johns Hopkins University},
abstract = {In this paper, we present a comprehensive study on supervised domain adaptation of PLDA based i-vector speaker recognition systems. After describing the system parameters subject to adaptation, we study the impact of their adaptation on recognition performance. Using the recently designed domain adaptation challenge, we observe that the adaptation of the PLDA parameters (i.e. across-class and within-class co variances) produces the largest gains. Nonetheless, length-normalization is also important; whereas using an indomani UBM and T matrix is not crucial. For the PLDA adaptation, we compare four approaches. Three of them are proposed in this work, and a fourth one was previously published. Overall, the four techniques are successful at leveraging varying amounts of labeled in-domain data and their performance is quite similar. However, our approaches are less involved, and two of them are applicable to a larger class of models (low-rank across-class).},
keywords = {},
pubstate = {published},
tppubtype = {}
}
McCree, Alan; Garcia-Romero, Daniel; Zhang, Xaiohui; Povey, Daniel
IMPROVING SPEAKER RECOGNITION PERFORMANCE IN THE DOMAIN ADAPTATION CHALLENGE USING DEEP NEURAL NETWORKS Proceedings Article
In: Proceedings of IEEE Spoken Language Technology Workshop, 2014.
@inproceedings{McCree:2014,
title = {IMPROVING SPEAKER RECOGNITION PERFORMANCE IN THE DOMAIN ADAPTATION CHALLENGE USING DEEP NEURAL NETWORKS},
author = {McCree, Alan and Garcia-Romero, Daniel and Xaiohui Zhang and Povey, Daniel},
year = {2014},
date = {2014-05-01},
booktitle = {Proceedings of IEEE Spoken Language Technology Workshop},
institution = {Human Language Technology Center of Excellence & Center for Language and Speech Processing The Johns Hopkins University,},
abstract = {Traditional i-vector speaker recognition systems use a Gaussian mixture
model (GMM) to collect sufficient statistics (SS). Recently, replacing
this GMM with a deep neural network (DNN) has shown
promising results. In this paper, we explore the use of DNNs to
collect SS for the unsupervised domain adaptation task of the Domain
Adaptation Challenge (DAC).We show that collecting SS with
a DNN trained on out-of-domain data boosts the speaker recognition
performance of an out-of-domain system by more than 25%. Moreover,
we integrate the DNN in an unsupervised adaptation framework,
that uses agglomerative hierarchical clustering with a stopping
criterion based on unsupervised calibration, and show that the initial
gains of the out-of-domain system carry over to the final adapted system.
Despite the fact that the DNN is trained on the out-of-domain
data, the final adapted system produces a relative improvement of
more than 30% with respect to the best published results on this task.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kumar, Gaurav; Post, Matt; Povey, Daniel; Khudanpur, Sanjeev
Some Insights From Translating Conversational Telephone Speech Proceedings Article
In: International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Florence, Italy, 2014.
@inproceedings{kumar2014some,
title = {Some Insights From Translating Conversational Telephone Speech},
author = {Gaurav Kumar and Post, Matt and Povey, Daniel and Khudanpur, Sanjeev},
url = {http://cs.jhu.edu/~post/papers/kumar2013some.pdf},
year = {2014},
date = {2014-05-01},
booktitle = {International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
address = {Florence, Italy},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Drexler, Jennifer; Rastogi, Pushpendre; Aguilar, Jacqueline; Van Durme, Benjamin; Post, Matt
A Wikipedia-based Corpus for Contextualized Machine Translation Proceedings Article
In: Calzolari, Nicoletta; Choukri, Khalid; Declerck, Thierry; Loftsson, Hrafn; Maegaard, Bente; Mariani, Joseph; Moreno, Asuncion; Odijk, Jan; Piperidis, Stelios (Ed.): Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pp. 3593–3596, European Language Resources Association (ELRA), Reykjavik, Iceland, 2014.
@inproceedings{DREXLER14.1217.L14-1150,
title = {A Wikipedia-based Corpus for Contextualized Machine Translation},
author = {Drexler, Jennifer and Rastogi, Pushpendre and Aguilar, Jacqueline and Van Durme, Benjamin and Post, Matt},
editor = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Loftsson, Hrafn and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios},
year = {2014},
date = {2014-05-01},
booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)},
pages = {3593--3596},
publisher = {European Language Resources Association (ELRA)},
address = {Reykjavik, Iceland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kumar, Gaurav; Post, Matt; Povey, Daniel; Khudanpur, Sanjeev
Some Insights From Translating Conversational Telephone Speech Proceedings Article
In: ICASSP 2014, Florence, Italy, 2014.
@inproceedings{kumar2014someb,
title = {Some Insights From Translating Conversational Telephone Speech},
author = {Kumar, Gaurav and Post, Matt and Povey, Daniel and Khudanpur, Sanjeev},
year = {2014},
date = {2014-05-01},
booktitle = {ICASSP 2014},
address = {Florence, Italy},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Pavlick, Ellie; Post, Matt; Irvine, Ann; Kachaev, Dmitry; Callison-Burch, Chris
The Language Demographics of Amazon Mechanical Turk Journal Article
In: Transactions of the Association for Computational Linguistics, vol. 2, pp. 79–92, 2014.
@article{pavlick2014languageb,
title = {The Language Demographics of Amazon Mechanical Turk},
author = {Pavlick, Ellie and Post, Matt and Irvine, Ann and Kachaev, Dmitry and Callison-Burch, Chris},
year = {2014},
date = {2014-02-01},
journal = {Transactions of the Association for Computational Linguistics},
volume = {2},
pages = {79--92},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Althouse, Ben; Allem, Jon-Patrick; Childers, Matt; Dredze, Mark; Ayers, John
Population Health Concerns During the United States' Great Recession Journal Article
In: pp. 166-170, 2014.
@article{Althouse:2014lr,
title = {Population Health Concerns During the United States' Great Recession},
author = {Ben Althouse and Jon-Patrick Allem and Matt Childers and Dredze, Mark and John Ayers},
url = {http://www.ajpmonline.org/article/S0749-3797(13)00581-3/abstract},
year = {2014},
date = {2014-02-01},
pages = {166-170},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Pavlick, Ellie; Post, Matt; Irvine, Ann; Kachaev, Dmitry; Callison-Burch, Chris
The Language Demographics of Amazon Mechanical Turk Journal Article
In: pp. 79–92, 2014.
@article{pavlick2014language,
title = {The Language Demographics of Amazon Mechanical Turk},
author = {Ellie Pavlick and Post, Matt and Irvine, Ann and Dmitry Kachaev and Callison-Burch, Chris},
url = {http://www.cis.upenn.edu/~ccb/publications/language-demographics-of-mechanical-turk.pdf},
year = {2014},
date = {2014-02-01},
pages = {79--92},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rudinger, Rachel; Van Durme, Benjamin
Is the Stanford Dependency Representation Semantic? Proceedings Article
In: Association for Computational Linguistics (ACL), Workshop on EVENTS, 2014.
@inproceedings{RudingerVanDurmeACL14,
title = {Is the Stanford Dependency Representation Semantic?},
author = {Rachel Rudinger and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Workshop on EVENTS},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ferraro, Francis; Thomas, Max; Gormley, Matt; Wolfe, Travis; Harman, Craig; Van Durme, Benjamin
Concretely Annotated Corpora Proceedings Article
In: 4th Workshop on Automated Knowledge Base Construction (AKBC), 2014.
@inproceedings{concretely-annotated-2014,
title = {Concretely Annotated Corpora},
author = {Francis Ferraro and Max Thomas and Gormley, Matt and Wolfe, Travis and Harman, Craig and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {4th Workshop on Automated Knowledge Base Construction (AKBC)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Osborne, Miles; Dredze, Mark
Facebook, Twitter and Google Plus for Breaking News: Is there a winner? Proceedings Article
In: International Conference on Weblogs and Social Media (ICWSM), 2014.
@inproceedings{Osborne:2014fk,
title = {Facebook, Twitter and Google Plus for Breaking News: Is there a winner?},
author = {Miles Osborne and Dredze, Mark},
url = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM14/paper/view/8072},
year = {2014},
date = {2014-01-01},
booktitle = {International Conference on Weblogs and Social Media (ICWSM)},
abstract = {Twitter is widely seen as being the go to place for breaking news. Recently however, competing Social Media have begun to carry news. Here we examine how Facebook, Google Plus and Twitter report on breaking news. We consider coverage (whether news events are reported) and latency (the time when they are reported). Using data drawn from three weeks in December 2013, we identify 29 major news events, ranging from celebrity deaths, plague outbreaks to sports events. We find that all media carry the same major events, but Twitter continues to be the preferred medium for breaking news, almost consistently leading Facebook or Google Plus. Facebook and Google Plus largely repost newswire stories and their main research value is that they conveniently package multitple sources of information together.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Wang, Shiliang; Paul, Michael; Dredze, Mark
Exploring Health Topics in Chinese Social Media: An Analysis of Sina Weibo Proceedings Article
In: AAAI Workshop on the World Wide Web and Public Health Intelligence, 2014.
@inproceedings{Wang:2014fk,
title = {Exploring Health Topics in Chinese Social Media: An Analysis of Sina Weibo},
author = {Shiliang Wang and Michael Paul and Dredze, Mark},
url = {http://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/download/8721/8222},
year = {2014},
date = {2014-01-01},
booktitle = {AAAI Workshop on the World Wide Web and Public Health Intelligence},
abstract = {This paper seeks to identify and characterize health-related topics discussed on the Chinese microblogging website, Sina Weibo. We identified nearly 1 million messages containing health-related keywords, filtered from a dataset of 93 million messages spanning five years. We applied probabilistic topic models to this dataset and identified the prominent health topics. We show that a variety of health topics are discussed in Sina Weibo, and that four flu-related topics are correlated with monthly influenza case rates in China.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Coppersmith, Glen; Harman, Craig; Dredze, Mark
Measuring Post Traumatic Stress Disorder in Twitter Proceedings Article
In: International Conference on Weblogs and Social Media (ICWSM), 2014.
@inproceedings{Coppersmith:2014lr,
title = {Measuring Post Traumatic Stress Disorder in Twitter},
author = {Coppersmith, Glen and Harman, Craig and Dredze, Mark},
year = {2014},
date = {2014-01-01},
booktitle = {International Conference on Weblogs and Social Media (ICWSM)},
abstract = {Traditional mental health studies rely on information primarily collected and analyzed through personal contact with a health care professional. Recent work has shown the utility of social media data for studying depression, but there have been limited evaluations of other mental health conditions. We consider post traumatic stress disorder (PTSD), a serious condition that affects millions worldwide, with especially high rates in military veterans. We show how to obtain a PTSD classifier for social media using simple searches of available Twitter data, a significant reduction in training data cost compared to previous work on mental health. We demonstrate its utility by an examination of language use from PTSD individuals, and by detecting elevated rates of PTSD at and around US military bases using our classifiers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Beller, Charley; Harman, Craig; Van Durme, Benjamin
Predicting Fine-grained Social Roles with Selectional Preferences Proceedings Article
In: Association for Computational Linguistics (ACL), Workshop on Language Technologies and Computational Social Science (LACSS), 2014.
@inproceedings{BellerHarmanVanDurmeACL14,
title = {Predicting Fine-grained Social Roles with Selectional Preferences},
author = {Charley Beller and Harman, Craig and Van Durme, Benjamin},
url = {https://www.aclweb.org/anthology/W/W14/W14-2515.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Workshop on Language Technologies and Computational Social Science (LACSS)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Chunxi; Jansen, Aren; Chen, Guoguo; Kintzley, Keith; Trmal, Jan; Khudanpur, Sanjeev
Low-Resource Open Vocabulary Keyword Search Using Point Process Models Proceedings Article
In: Fifteenth Annual Conference of the International Speech Communication Association, 2014.
@inproceedings{liu2014low,
title = {Low-Resource Open Vocabulary Keyword Search Using Point Process Models},
author = {Chunxi Liu and Aren Jansen and Guoguo Chen and Keith Kintzley and Jan Trmal and Khudanpur, Sanjeev},
year = {2014},
date = {2014-01-01},
booktitle = {Fifteenth Annual Conference of the International Speech Communication Association},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Knowles, Rebecca; Dredze, Mark; Evans, Kathleen; Lasser, Elyse; Richards, Tom; Weiner, Jonathan; Kharrazi, Hadi
High Risk Pregnancy Prediction from Clinical Text Proceedings Article
In: NIPS Workshop on Machine Learning for Clinical Data Analysis, 2014.
@inproceedings{Knowles:2014ly,
title = {High Risk Pregnancy Prediction from Clinical Text},
author = {Rebecca Knowles and Dredze, Mark and Kathleen Evans and Elyse Lasser and Tom Richards and Jonathan Weiner and Hadi Kharrazi},
year = {2014},
date = {2014-01-01},
booktitle = {NIPS Workshop on Machine Learning for Clinical Data Analysis},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Fine, Alex; Frank, Austin; Jaeger, T.; Van Durme, Benjamin
Biases in Predicting the Human Language Model Proceedings Article
In: Association for Computational Linguistics (ACL), Short Papers, 2014.
@inproceedings{FineFrankJaegerVanDurmeACL14,
title = {Biases in Predicting the Human Language Model},
author = {Alex Fine and Austin Frank and T. Jaeger and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Short Papers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Peng, Nanyun; Wang, Yiming; Dredze, Mark
Learning Polylingual Topic Models from Code-Switched Social Media Documents Proceedings Article
In: Association for Computational Linguistics (ACL), 2014.
@inproceedings{Peng:2014fk,
title = {Learning Polylingual Topic Models from Code-Switched Social Media Documents},
author = {Nanyun Peng and Yiming Wang and Dredze, Mark},
url = {http://www.aclweb.org/anthology/P/P14/P14-2110.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL)},
abstract = {Code-switched documents are common in social media, providing evidence for polylingual topic models to infer aligned topics across languages. We present Code-Switched LDA (csLDA), which infers language specific topic distributions based on code-switched documents to facilitate multi-lingual corpus analysis. We experiment on two code-switching corpora (English-Spanish Twitter data and English-Chinese Weibo data) and show that csLDA improves perplexity over LDA, and learns semantically coherent aligned topics as judged by human annotators.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Coppersmith, Glen; Dredze, Mark; Harman, Craig
Quantifying Mental Health Signals in Twitter Proceedings Article
In: ACL Workshop on Computational Linguistics and Clinical Psychology, 2014.
@inproceedings{Coppersmith:2014fk,
title = {Quantifying Mental Health Signals in Twitter},
author = {Coppersmith, Glen and Dredze, Mark and Harman, Craig},
year = {2014},
date = {2014-01-01},
booktitle = {ACL Workshop on Computational Linguistics and Clinical Psychology},
abstract = {The ubiquity of social media provides a rich opportunity to enhance the data available to mental health clinicians and researchers, enabling a better-informed and better-equipped mental health field. We present analysis of mental health phenomena in publicly available Twitter data, demonstrating how rigorous application of simple natural language processing methods can yield insight into specific disorders as well as mental health writ large, along with evidence that as-of-yet undiscovered linguistic signals relevant to mental health exist in social media. We present a novel method for gathering data for a range of mental illnesses quickly and cheaply, then focus on analysis of four in particular: post-traumatic stress disorder (PTSD), major depressive disorder, bipolar disorder, and seasonal affective disorder. We intend for these proof-of-concept results to inform the necessary ethical discussion regarding the balance between the utility of such data and the privacy of mental health related information.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Broniatowski, David; Paul, Michael; Dredze, Mark
Twitter: Big Data Opportunities (Letter) Journal Article
In: pp. 148, 2014.
@article{Broniatowski:2014nr,
title = {Twitter: Big Data Opportunities (Letter)},
author = {David Broniatowski and Michael Paul and Dredze, Mark},
url = {http://www.sciencemag.org/content/345/6193/148.1.full},
year = {2014},
date = {2014-01-01},
pages = {148},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gao, Ning; Oard, Douglas; Dredze, Mark
A Test Collection for Email Entity Linking Proceedings Article
In: NIPS Workshop on Automated Knowledge Base Construction, 2014.
@inproceedings{Gao:2014ty,
title = {A Test Collection for Email Entity Linking},
author = {Ning Gao and Douglas Oard and Dredze, Mark},
year = {2014},
date = {2014-01-01},
booktitle = {NIPS Workshop on Automated Knowledge Base Construction},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Paul, Michael; Dredze, Mark; Broniatowski, David
Twitter Improves Influenza Forecasting Journal Article
In: 2014.
@article{Paul_Dredze_Broniatowski:2014,
title = {Twitter Improves Influenza Forecasting},
author = {Michael Paul and Dredze, Mark and David Broniatowski},
url = {http://currents.plos.org/outbreaks/article/twitter-improves-influenza-forecasting/},
year = {2014},
date = {2014-01-01},
abstract = {Accurate disease forecasts are imperative when preparing for influenza epidemic outbreaks; nevertheless, these forecasts are often limited by the time required to collect new, accurate data. In this paper, we show that data from the microblogging community Twitter significantly improves influenza forecasting. Most prior influenza forecast models are tested against historical influenza-like illness (ILI) data from the U.S. Centers for Disease Control and Prevention (CDC). These data are released with a one-week lag and are often initially inaccurate until the CDC revises them weeks later. Since previous studies utilize the final, revised data in evaluation, their evaluations do not properly determine the effectiveness of forecasting. Our experiments using ILI data available at the time of the forecast show that models incorporating data derived from Twitter can reduce forecasting error by 17-30% over a baseline that only uses historical data. For a given level of accuracy, using Twitter data produces forecasts that are two to four weeks ahead of baseline models. Additionally, we find that models using Twitter data are, on average, better predictors of influenza prevalence than are models using data from Google Flu Trends, the leading web data source.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Paul, Michael; Dredze, Mark; Broniatowski, David
Challenges in Influenza Forecasting and Opportunities for Social Media Proceedings Article
In: AAAI Workshop on the World Wide Web and Public Health Intelligence, 2014.
@inproceedings{paul_dredze_aaai:14,
title = {Challenges in Influenza Forecasting and Opportunities for Social Media},
author = {Michael Paul and Dredze, Mark and David Broniatowski},
year = {2014},
date = {2014-01-01},
booktitle = {AAAI Workshop on the World Wide Web and Public Health Intelligence},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, Ning; Webber, William; Oard, Douglas W
Reducing Reliance on Relevance Judgments for System Comparison by Using Expectation-Maximization Proceedings Article
In: The 36th European Conference on Information Retrieval, pp. 1–12, Springer, 2014.
@inproceedings{Gao2014ECIR,
title = {Reducing Reliance on Relevance Judgments for System Comparison by Using Expectation-Maximization},
author = {Ning Gao and William Webber and Douglas W Oard},
url = {http://terpconnect.umd.edu/~oard/pdf/ecir14.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {The 36th European Conference on Information Retrieval},
pages = {1--12},
publisher = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Aguilar, Jacqueline; Beller, Charley; McNamee, Paul; Van Durme, Benjamin; Strassel, Stephanie; Song, Zhiyi; Ellis, Joe
A Comparison of the Events and Relations Across ACE, ERE, TAC-KBP, and FrameNet Annotation Standards. Proceedings Article
In: ACL Workshop: EVENTS, 2014.
@inproceedings{Aguilar2014,
title = {A Comparison of the Events and Relations Across ACE, ERE, TAC-KBP, and FrameNet Annotation Standards.},
author = {Jacqueline Aguilar and Charley Beller and McNamee, Paul and Van Durme, Benjamin and Stephanie Strassel and Zhiyi Song and Joe Ellis},
url = {https://www.aclweb.org/anthology/W/W14/W14-2907.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {ACL Workshop: EVENTS},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Beller, Charley; Knowles, Rebecca; Harman, Craig; Bergsma, Shane; Mitchell, Margaret; Van Durme, Benjamin
I'm a Belieber: Social Roles via Self-identification and Conceptual Attributes Proceedings Article
In: Association for Computational Linguistics (ACL), Short Papers, 2014.
@inproceedings{BellerKnowlesHarmanBergsmaMitchellVanDurmeACL14,
title = {I'm a Belieber: Social Roles via Self-identification and Conceptual Attributes},
author = {Charley Beller and Rebecca Knowles and Harman, Craig and Bergsma, Shane and Mitchell, Margaret and Van Durme, Benjamin},
url = {http://aclweb.org/anthology/P14-2030},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Short Papers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ayers, John; Althouse, Benjamin; Johnson, Morgan; Dredze, Mark; Cohen, Joanna
What's the Healthiest Day? Circaseptan (Weekly) Rhythms in Healthy Considerations Journal Article
In: 2014.
@article{Ayers:2014lr,
title = {What's the Healthiest Day? Circaseptan (Weekly) Rhythms in Healthy Considerations},
author = {John Ayers and Benjamin Althouse and Morgan Johnson and Dredze, Mark and Joanna Cohen},
url = {http://www.ajpmonline.org/article/S0749-3797(14)00099-3/abstract},
year = {2014},
date = {2014-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rastogi, Pushpendre; Van Durme, Benjamin
Augmenting FrameNet Via PPDB Proceedings Article
In: Association for Computational Linguistics (ACL), Workshop on EVENTS, 2014.
@inproceedings{RastogiVanDurmeACL14,
title = {Augmenting FrameNet Via PPDB},
author = {Pushpendre Rastogi and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Workshop on EVENTS},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Osborne, Miles; Lall, Ashwin; Van Durme, Benjamin
Exponential Reservoir Sampling for Streaming Language Models Proceedings Article
In: Association for Computational Linguistics (ACL), Short Papers, 2014.
@inproceedings{OsborneLallVanDurmeACL14,
title = {Exponential Reservoir Sampling for Streaming Language Models},
author = {Miles Osborne and Ashwin Lall and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Short Papers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yu, Mo; Gormley, Matt; Dredze, Mark
Factor-based Compositional Embedding Models Proceedings Article
In: NIPS Workshop on Learning Semantics, 2014.
@inproceedings{Mo-Yu:2014qv,
title = {Factor-based Compositional Embedding Models},
author = {Mo Yu and Gormley, Matt and Dredze, Mark},
year = {2014},
date = {2014-01-01},
booktitle = {NIPS Workshop on Learning Semantics},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lee, Joy; DeCamp, Matthew; Dredze, Mark; Chisolm, Margaret; Berger, Zackary
What Are Health-related Users Tweeting? A Qualitative Content Analysis of Health-related Users and their Messages on Twitter Journal Article
In: 2014.
@article{Lee:2014ve,
title = {What Are Health-related Users Tweeting? A Qualitative Content Analysis of Health-related Users and their Messages on Twitter},
author = {Joy Lee and Matthew DeCamp and Dredze, Mark and Margaret Chisolm and Zackary Berger},
url = {http://www.jmir.org/2014/10/e237},
year = {2014},
date = {2014-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Paul, Michael; Dredze, Mark
Discovering Health Topics in Social Media Using Topic Models Journal Article
In: 2014.
@article{Paul:2014rt,
title = {Discovering Health Topics in Social Media Using Topic Models},
author = {Michael Paul and Dredze, Mark},
url = {http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0103408},
year = {2014},
date = {2014-01-01},
abstract = {By aggregating self-reported health statuses across millions of users, we seek to characterize the variety of health information discussed in Twitter. We describe a topic modeling framework for discovering health topics in Twitter, a social media website. This is an exploratory approach with the goal of understanding what health topics are commonly discussed in social media. This paper describes in detail a statistical topic model created for this purpose, the Ailment Topic Aspect Model (ATAM), as well as our system for filtering general Twitter data based on health keywords and supervised classification. We show how ATAM and other topic models can automatically infer health topics in 144 million Twitter messages from 2011 to 2013. ATAM discovered 13 coherent clusters of Twitter messages, some of which correlate with seasonal influenza (r = 0.689) and allergies (r = 0.810) temporal surveillance data, as well as exercise (r = .534) and obesity (r = −.631) related geographic survey data in the United States. These results demonstrate that it is possible to automatically discover topics that attain statistically significant correlations with ground truth data, despite using minimal human supervision and no historical data to train the model, in contrast to prior work. Additionally, these results demonstrate that a single general-purpose model can identify many different health topics in social media.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Trmal, Jan; Chen, Guoguo; Povey, Daniel; Khudanpur, Sanjeev; Ghahremani, Pegah; Zhang, Xiaohui; Manohar, Vimal; Liu, Chunxi; Jansen, Aren; Klakow, Dietrich; Yarowskey, David; Metze, Florian
A KEYWORD SEARCH SYSTEM USING OPEN SOURCE SOFTWARE Proceedings Article
In: IEEE Workshop on Spoken Language Technology, 2014.
@inproceedings{trmalkeyword,
title = {A KEYWORD SEARCH SYSTEM USING OPEN SOURCE SOFTWARE},
author = {Jan Trmal and Guoguo Chen and Povey, Daniel and Khudanpur, Sanjeev and Pegah Ghahremani and Xiaohui Zhang and Vimal Manohar and Chunxi Liu and Aren Jansen and Dietrich Klakow and David Yarowskey and Florian Metze},
year = {2014},
date = {2014-01-01},
booktitle = {IEEE Workshop on Spoken Language Technology},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Benton, Adrian; Deyoung, Jay; Teichert, Adam; Dredze, Mark; Van Durme, Benjamin; Mayhew, Stephen; Thomas, Max
Faster (and Better) Entity Linking with Cascades Proceedings Article
In: NIPS Workshop on Automated Knowledge Base Construction, 2014.
@inproceedings{Benton:2014qe,
title = {Faster (and Better) Entity Linking with Cascades},
author = {Adrian Benton and Jay Deyoung and Adam Teichert and Dredze, Mark and Van Durme, Benjamin and Stephen Mayhew and Thomas, Max},
year = {2014},
date = {2014-01-01},
booktitle = {NIPS Workshop on Automated Knowledge Base Construction},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Yu, Mo; Dredze, Mark
Improving Lexical Embeddings with Semantic Knowledge Proceedings Article
In: Association for Computational Linguistics (ACL), 2014.
@inproceedings{Yu:2014,
title = {Improving Lexical Embeddings with Semantic Knowledge},
author = {Mo Yu and Dredze, Mark},
url = {http://www.aclweb.org/anthology/P14-2089},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL)},
abstract = {Word embeddings learned on unlabeled data are a popular tool in semantics, but may not capture the desired semantics. We propose a new learning objective that incorporates both a neural language model objective and prior knowledge from semantic resources to learn improved lexical semantic embeddings. We demonstrate that our embeddings improve over those learned solely on raw text in three settings: language modeling, measuring semantic similarity, and predicting human judgements.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kamper, Herman; Jansen, Aren; King, Simon; Goldwater, Sharon
UNSUPERVISED LEXICAL CLUSTERING OF SPEECH SEGMENTS USING FIXED-DIMENSIONAL ACOUSTIC EMBEDDINGS Proceedings Article
In: IEEE Workshop on Spoken Language Technology, 2014.
@inproceedings{kamperunsupervised,
title = {UNSUPERVISED LEXICAL CLUSTERING OF SPEECH SEGMENTS USING FIXED-DIMENSIONAL ACOUSTIC EMBEDDINGS},
author = {Herman Kamper and Aren Jansen and Simon King and Sharon Goldwater},
year = {2014},
date = {2014-01-01},
booktitle = {IEEE Workshop on Spoken Language Technology},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
May, Chandler; Clemmer, Alex; Van Durme, Benjamin
Particle Filter Rejuvenation and Latent Dirichlet Allocation Proceedings Article
In: Association for Computational Linguistics (ACL), Short Papers, 2014.
@inproceedings{MayClemmerVanDurmeACL14,
title = {Particle Filter Rejuvenation and Latent Dirichlet Allocation},
author = {Chandler May and Alex Clemmer and Van Durme, Benjamin},
year = {2014},
date = {2014-01-01},
booktitle = {Association for Computational Linguistics (ACL), Short Papers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kintzley, Keith; Jansen, Aren; Hermansky, Hynek
Featherweight Phonetic Keyword Search for Conversational Speech Proceedings Article
In: International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2014.
@inproceedings{kintzleyfeatherweight,
title = {Featherweight Phonetic Keyword Search for Conversational Speech},
author = {Keith Kintzley and Aren Jansen and Hermansky, Hynek},
year = {2014},
date = {2014-01-01},
booktitle = {International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
There are no upcoming events.