@article {Kraventhoraxjnl-2021-218563, author = {Luke M Kraven and Adam R Taylor and Philip L Molyneaux and Toby M Maher and John E McDonough and Marco Mura and Ivana V Yang and David A Schwartz and Yong Huang and Imre Noth and Shwu Fan Ma and Astrid J Yeo and William A Fahy and R Gisli Jenkins and Louise V Wain}, title = {Cluster analysis of transcriptomic datasets to identify endotypes of idiopathic pulmonary fibrosis}, elocation-id = {thoraxjnl-2021-218563}, year = {2022}, doi = {10.1136/thoraxjnl-2021-218563}, publisher = {BMJ Publishing Group Ltd}, abstract = {Background Considerable clinical heterogeneity in idiopathic pulmonary fibrosis (IPF) suggests the existence of multiple disease endotypes. Identifying these endotypes would improve our understanding of the pathogenesis of IPF and could allow for a biomarker-driven personalised medicine approach. We aimed to identify clinically distinct groups of patients with IPF that could represent distinct disease endotypes.Methods We co-normalised, pooled and clustered three publicly available blood transcriptomic datasets (total 220 IPF cases). We compared clinical traits across clusters and used gene enrichment analysis to identify biological pathways and processes that were over-represented among the genes that were differentially expressed across clusters. A gene-based classifier was developed and validated using three additional independent datasets (total 194 IPF cases).Findings We identified three clusters of patients with IPF with statistically significant differences in lung function (p=0.009) and mortality (p=0.009) between groups. Gene enrichment analysis implicated mitochondrial homeostasis, apoptosis, cell cycle and innate and adaptive immunity in the pathogenesis underlying these groups. We developed and validated a 13-gene cluster classifier that predicted mortality in IPF (high-risk clusters vs low-risk cluster: HR 4.25, 95\% CI 2.14 to 8.46, p=3.7{\texttimes}10-5).Interpretation We have identified blood gene expression signatures capable of discerning groups of patients with IPF with significant differences in survival. These clusters could be representative of distinct pathophysiological states, which would support the theory of multiple endotypes of IPF. Although more work must be done to confirm the existence of these endotypes, our classifier could be a useful tool in patient stratification and outcome prediction in IPF.Data are available in a public, open access repository and available on reasonable request. All gene expression data used in this study are freely available on the Gene Expression Omnibus (https://www.ncbi.nlm.nih.gov/geo/). Additional clinical data for some participants were obtained directly from the study authors and are available on reasonable request.}, issn = {0040-6376}, URL = {https://thorax.bmj.com/content/early/2022/05/24/thoraxjnl-2021-218563}, eprint = {https://thorax.bmj.com/content/early/2022/05/24/thoraxjnl-2021-218563.full.pdf}, journal = {Thorax} }