@inproceedings{49a0c68e2c5849448c142bafc9938099,
title = "Parallel fault tolerant algorithms for parabolic problems",
abstract = "With increasing number of processors available on nowadays high performance computing systems, the mean time between failure of these machines is decreasing. The ability of hardware and software components to handle process failures is therefore getting increasingly important. The objective of this paper is to present a fault tolerant approach for the implicit forward time integration of parabolic problems using explicit formulas. This technique allows the application to recover from process failures and to reconstruct the lost data of the failed process(es) avoiding the roll-back operation required in most checkpoint-restart schemes. The benchmark used to highlight the new algorithms is the two dimensional heat equation solved with a first order implicit Euler scheme.",
author = "Hatem Ltaief and Marc Garbey and Edgar Gabriel",
year = "2006",
doi = "10.1007/11823285_73",
language = "English (US)",
isbn = "3540377832",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "700--709",
booktitle = "Euro-Par 2006 Parallel Processing - 12th International Euro-Par Conference, Proceedings",
address = "Germany",
note = "12th International Euro-Par Conference 2006 ; Conference date: 28-08-2006 Through 01-09-2006",
}