[Saga-devel] saga-projects SVN commit 838: /papers/clouds/
sjha at cct.lsu.edu
sjha at cct.lsu.edu
Mon Jan 12 13:46:56 CST 2009
User: sjha
Date: 2009/01/12 01:46 PM
Added:
/papers/clouds/
saga.bib, saga_cloud_interop.tex, saga_data_intensive.bib
Log:
copied 'saga_data_intensive.tex' to saga_cloud_interop.tex
and then cleaned up
added some minor remarks..
changed title..
getting ready for onslaught..
File Changes:
Directory: /papers/clouds/
==========================
File [added]: saga.bib
Delta lines: +157 -0
===================================================================
--- papers/clouds/saga.bib 2009-01-11 00:28:41 UTC (rev 837)
+++ papers/clouds/saga.bib 2009-01-12 19:46:36 UTC (rev 838)
@@ -0,0 +1,157 @@
+ at misc{saga-req,
+ Author = {Andre Merzky and Shantenu Jha},
+ Date-Added = {2008-02-24 20:36:03 -0600},
+ Date-Modified = {2008-02-24 20:36:03 -0600},
+ Howpublished = {Grid Forum Document GFD.71},
+ Note = {Global Grid Forum},
+ Title = {{A Requirements Analysis for a Simple API for Grid Applica
+tions}},
+ Year = 2006}
+
+ at misc{saga-uc,
+ Author = {Andre Merzky and Shantenu Jha},
+ Date-Added = {2008-02-24 20:35:56 -0600},
+ Date-Modified = {2008-02-24 20:35:56 -0600},
+ Howpublished = {Grid Forum Document GFD.70},
+ Note = {Global Grid Forum},
+ Title = {{A Collection of Use Cases for a Simple API for Grid Appli
+cations}},
+ Year = 2006}
+
+ at inproceedings{mapreduce,
+ Address = {Berkeley, CA, USA},
+ Author = {Jeffrey Dean and Sanjay Ghemawat},
+ Booktitle = {OSDI'04: Proceedings of the 6th conference on
+ Symposium on Opearting Sys tems Design \&
+ Implementation},
+ Date-Added = {2008-02-23 20:47:01 -0600},
+ Date-Modified = {2008-05-12 23:43:10 +0200},
+ Location = {San Francisco, CA},
+ Pages = {137--150},
+ Publisher = {USENIX Association},
+ Title = {{MapReduce: Simplified Data Processing on Large Clusters}},
+ Year = {2004}}
+
+ at misc{allpairs, note = {All-Pairs: An Abstraction for Data Intensive Cloud Computing, Christopher Moretti, Jared Bulosan, Douglas Thain, and Patrick Flynn, IEEE International Parallel and Distributed Processing Symposium (IPDPS), April 2008.}}
+
+ at article{sagastuff,
+ Author = {Goodale, Tom and Jha, Shantenu and Kaiser, Harmut
+ and Kielmann, Thilo and K leijer, Pascal and von Laszewski,
+ Gregor and Lee, Craig and Merzky, Andre and Rajic, Hrabri a nd
+ Shalf, John}, Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100}, Journal =
+ {Computational Methods in Science and Technology}, Number =
+ {1}, Pages = {7-20}, Title = {{SAGA: A Simple API for Grid
+ Applications, High-Level Application Programmin g on the
+ Grid}}, Url =
+ {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_applicat
+ ions_sc05.pdf}, Volume = {12}, Year = {2006}, Bdsk-Url-1 =
+ {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_a
+ pplications_sc05.pdf}}
+
+ at misc{saga_gfd90,
+author={Tom Goodale and Shantenu Jha and Hartmut Kaiser and Thilo Kielmann and Pascal Kleijer and Andre Merzky and John Shalf and Christopher Smith},
+title="{A Simple API for Grid Applications (SAGA)}",
+howpublished = {OGF Document Series 90,
+http://www.ogf.org/documents/GFD.90.pdf}}
+
+ at inproceedings{saga_gin,
+ Address = {Washington, DC, USA},
+ Author = {Shantenu Jha and Hartmut Kaiser and Andre Merzky and Ole Weidner},
+ Booktitle = {E-SCIENCE '07: Proceedings of the Third IEEE International Conference on e-Science and Grid Computing (e-Science 2007)},
+ Date-Added = {2008-02-16 18:54:59 -0600},
+ Date-Modified = {2008-02-16 18:54:59 -0600},
+ Doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39},
+ Isbn = {0-7695-3064-8},
+ Pages = {584--591},
+ Publisher = {IEEE Computer Society},
+ Title = {Grid Interoperability at the Application Level Using SAGA},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39}}
+
+ at inproceedings{saga_escience07,
+ Author = {Jha, Shantenu and Kaiser, Hartmut and El Khamra, Yaakoub and Weidner, Ole},
+ Booktitle = {Accepted for 3rd IEEE Conference on eScience2007
+ and Grid Computing, Ban galore, India.},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Title = {Design and Implementation of Network Performance
+ Aware Applications Using SA GA and Cactus},
+ Url = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf}}
+
+ at misc{saga_tg08, note = {Developing Large-Scale Adaptive Scientific
+Applications with Hard to Predict Runtime Resource Requirements, {\it
+Proceedings of TeraGrid08}, available at http://tinyurl.com/5du32j}}
+
+ at misc{repex_ptrsa, note = {Adaptive Distributed Replica Exchange, {\it
+Accepted for Phil. Transactions of the Royal Soceity London A}}}
+
+
+ at misc{saga_mapreduce, note={Exposing the Power of Google through SAGA, {\it Google Summer of Code} http://www.omii.ac.uk/wiki/MPGoogleSAGA}}
+
+
+ at misc{dpa-paper, note = {S. Jha et al., {\em Programming Abstractions
+ for Large-scale Distributed Application s}, to be
+ submitted to ACM Computing Surveys; draft at
+ \url{http://www.cct.lsu.edu/~sjha/publications/dpa_surveypaper.pdf}}}
+
+ at misc{remd-manager_url, note={https://svn.cct.lsu.edu/repos/saga-projects/applications/REMDgManager/src/main.py}}
+
+ at misc{saga_url, url={{http://saga.cct.lsu.edu}}}
+
+ at misc{glite, note={{http://glite.web.cern.ch/glite/}}}
+
+ at inproceedings{escience07,
+ author = {Shantenu Jha and Hartmut Kaiser and Yaakoub El Khamra and Ole Weidner},
+ title = {{Design and Implementation of Network Performance Aware Applications Using SAGA and Cactus}},
+ booktitle = {E-SCIENCE '07: Proceedings of the Third IEEE International Conference on e-Science and Grid Computing},
+ year = {2007},
+ isbn = {0-7695-3064-8},
+ pages = {143--150},
+ doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.28},
+ OPTpublisher = {IEEE Computer Society},
+ OPTaddress = {Washington, DC, USA},
+ }
+
+ at misc{ogf_web,
+ author = {{Open Grid Forum}},
+ url = {http://www.ogf.org/}
+}
+
+ at misc{DRMAA_url,
+ author = {{Distributed Resource Management Application API}},
+ note = {{http://drmaa.org}}
+}
+
+ at misc{gridrpc_url, note = {{http://forge.ogf.org/sf/projects/gridrpc-wg}} }
+
+ at misc{saga_core_long,
+ author = {{Tom Goodale, Shantenu Jha, Hartmut Kaiser, Thilo Kielmann, Pascal Kleijer, Andre Merzky, John Shalf, Christopher Smith}},
+ url = {http://www.ogf.org/documents/GFD.90.pdf}
+}
+
+ at misc{saga-core,
+ author = {{T Goodale and {\it et al} }},
+ url = {http://www.ogf.org/documents/GFD.90.pdf}
+}
+
+ at misc{novelsubmissionmode,
+ author = {{P. Chakraborty and S. Jha and D. Katz}},
+ title = {Novel Submission Modes of Tightly-coupled Jobs Across
+ Distributed Resources}},
+ url = {Accepted for Publication in Phil. Trans. of the Royal Society A}
+}
+
+ at misc{saga_condor_url, note = {{http://fortytwo.cct.lsu.edu:8000/SAGA/wiki/CondorAdaptor}}}
+
+ at misc{saga_condor,
+ author = {{Shantenu Jha}},
+ OPTtitle = {{Being Optimally Lazy: The case for integrating SAGA
+ with Condor, Talk given at Condor Week 2008}},
+ OPTurl = {http://www.cs.wisc.edu/condor/CondorWeek2008/condor\_presentations/jha\_saga.pdf},
+ OPTnote = {\url{http://www.cs.wisc.edu/condor/CondorWeek2008/condor_presentations/jha_saga.pdf}},
+}
+
+ at misc{buyya_hpcc, note = {Rajkumar Buyya, and et al, {\it Market-Oriented Cloud Computing: Vision, Hype, and Reality for Delivering IT Services as Computing Utilities}, Keynote Paper, Proceedings of the 10th IEEE International Conference on High Performance Computing and Communications (HPCC 2008, IEEE CS Press, Los Alamitos, CA, USA), Sept. 25-27, 2008, Dalian, China}, url = }http://www.gridbus.org/papers/hpcc2008_keynote_cloudcomputing.pdf}}
\ No newline at end of file
File [added]: saga_cloud_interop.tex
Delta lines: +920 -0
===================================================================
--- papers/clouds/saga_cloud_interop.tex 2009-01-11 00:28:41 UTC (rev 837)
+++ papers/clouds/saga_cloud_interop.tex 2009-01-12 19:46:36 UTC (rev 838)
@@ -0,0 +1,920 @@
+\documentclass[conference,final]{IEEEtran}
+
+\usepackage[utf8]{inputenc}
+\usepackage{graphicx}
+\usepackage{url}
+\usepackage{float}
+\usepackage{times}
+\usepackage{listings}
+\usepackage{times}
+\usepackage{paralist}
+\usepackage{wrapfig}
+\usepackage[small,it]{caption}
+\usepackage{multirow}
+\usepackage{ifpdf}
+\usepackage{subfig}
+\usepackage{color}
+\usepackage{natbib}
+\usepackage{pdfsync}
+\usepackage{fancyvrb}
+\usepackage{wrapfig}
+\usepackage{multirow}
+%\usepackage{multicolumn}
+
+\newenvironment{shortlist}{
+ \vspace*{-0.85em}
+ \begin{itemize}
+ \setlength{\itemsep}{-0.3em}
+}{
+ \end{itemize}
+ \vspace*{-0.6em}
+}
+
+\DefineShortVerb{\|}
+\DefineVerbatimEnvironment{mycode}{Verbatim}
+{
+ label=Code Example,
+ fontsize=\scriptsize,
+ frame=single,
+% framerule=1pt,
+ framesep=0.25em,
+ numbers=right, %numbers=right,
+ numbersep=0.5pt,
+ gobble=0,
+ numberblanklines=false
+}
+
+% \title{pplication-level Interoperability between Clouds and Grids}
+% \title{SAGA-MapReduce: Providing Infrastructure Independence and
+% Cloud-Grid Interoperability}
+\title{Application Level Interoperability between Clouds and Grids}
+\author{Andre Merzky$^{1}$, Kate Stamou, Shantenu Jha$^{123} ......$\\
+ \small{\emph{$^{1}$Center for Computation \& Technology, Louisiana
+ State University, USA}}\\
+ \small{\emph{$^{2}$Department of Computer Science, Louisiana State
+ University, USA}}\\
+ \small{\emph{$^{3}$e-Science Institute, Edinburgh, UK}}\\
+}
+
+\newif\ifdraft
+%\drafttrue
+\ifdraft
+\newcommand{\amnote}[1]{ {\textcolor{magenta} { ***AM: #1c }}}
+\newcommand{\jhanote}[1]{ {\textcolor{red} { ***SJ: #1 }}}
+\newcommand{\michaelnote}[1]{ {\textcolor{blue} { ***MM: #1 }}}
+\else
+\newcommand{\amnote}[1]{}
+\newcommand{\jhanote}[1]{}
+\newcommand{\michaelnote}[1]{ {\textcolor{blue} { ***MM: #1 }}}
+\fi
+
+\newcommand{\sagamapreduce }{SAGA-MapReduce }
+\newcommand{\tc }{ $T_c$ }
+
+\newcommand{\upup}{\vspace*{-0.5em}}
+\newcommand{\upp}{\vspace*{-0.5em}}
+\newcommand{\up}{\vspace*{-0.25em}}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+ SAGA is a high-level programming interface which provides the
+ ability to create distributed applications in an infrastructure
+ independent way. In this paper, we show how MapReduce has been
+ implemented using SAGA and demonstrate its interoperability across
+ Clouds and Grids. We discuss how a range of {\it cloud adapters}
+ have been developed for SAGA. We discuss the advantages of
+ programmatically developing MapReduce using SAGA, by demonstrating
+ that the SAGA-based implementation is infrastructure independent
+ whilst still providing control over the deployment, distribution and
+ run-time decomposition. .... The ability to control the
+ distribution and placement of the computation units (workers) is
+ critical in order to implement the ability to move computational
+ work to the data. This is required to keep data network transfer low
+ and in the case of commercial Clouds the monetary cost of computing
+ the solution low... Using data-sets of size up to 10GB, and up to
+ 10 workers, we provide detailed performance analysis of the
+ SAGA-MapReduce implementation, and show how controlling the
+ distribution of computation and the payload per worker helps enhance
+ performance.
+\end{abstract}
+
+\section{Introduction}
+
+The case for effective programming abstractions and patterns is not
+new in computer science. Coupled with the heterogeneity and evolution
+of large-scale distributed systems, the fundamentally distributed
+nature of data and its exponential increase -- collection, storing,
+processing of data, it can be argued that there is a greater premium
+than ever before on abstractions at multiple levels.
+
+Although Clouds are a nascent infrastructure, with the
+force-of-industry behind their development and uptake (and not just
+the hype), their impact can not be ignored. Specifically, with the
+emergence of Clouds as important distributed computing infrastructure,
+we need abstractions that can support existing and emerging
+programming models for Clouds. Inevitably, the unified concept of a
+Cloud is evolving into different flavours and implementations on the
+ground. For example, there are already multiple implementations of
+Google's Bigtable, such as HyberTable, Cassandara, HBase. There is
+bound to be a continued proliferation of such Cloud-like
+infrastructure; this is reminiscent of the plethora of grid middleware
+distributions. Thus application-level support and inter-operability
+with different Cloud infrastructure is critical. And issues of scale
+aside, the transition of existing distributed programming models and
+styles, must be as seamless and as least disruptive as possible, else
+it risks engendering technical and political horror stories
+reminiscent of Globus, which became a disastrous by-word for
+everything wrong with the complexity of Grids.
+
+{\it Application-level} programming and data-access patterns remain
+essentially invariant on different infrastructure. Thus the ability to
+support application specific data-access patterns is both useful and
+important~\cite{dpa-paper}. There are however, infrastructure
+specific features -- technical and policy, that need to be
+addressed. For example, Amazon, the archetypal Cloud System has a
+well-defined cost model for data transfer across {\it its}
+network. Hence, Programming Models for Clouds must be cognizant of the
+requirement to programmatically control the placement of compute and
+data relative to each other -- both statically and even dynamically.
+It is not that traditional Grids applications do not have this
+interesting requirement, but that, such explicit support is typically
+required for very large-scale and high-performing applications. In
+contrast, for most Cloud applications such control is required in
+order to ensure basic cost minimization, i.e., the same computational
+task can be priced very differently for possibly the same performance.
+These factors and trends place a critical importance on effective
+programming abstractions for data-intensive applications for both
+Clouds and Grids and importantly in bridging the gap between the two.
+Any {\it effective} abstraction will be cognizant and provide at least
+the above features, viz., relative compute-data placement,
+application-level patterns and interoperabilty.
+
+The primary aim of this work is to establish that SAGA -- the Simple
+API for Grid Applications, is an {\it effective} abstraction that can
+support different programming models and is usable on traditional
+(Grids) and emerging (Clouds) distributed infrastructure. Our
+approach is to begin with a well understood data-parallel programming
+pattern (MapReduce) and implement it using SAGA -- a standard programming
+interface. SAGA has
+been demonstrated to support distributed HPC programming models and
+applications effectively; it is an important aim of this work to
+verify if SAGA has the expressiveness to implement data-parallel
+programming and is capable of supporting acceptable levels of
+performance (as compared with native implementations of
+MapReduce). After this conceptual validation, our aim is to use the
+{\it same} implementation of \sagamapreduce on Cloud systems,
+and test for inter-operability between different flavours of Clouds as
+well as between Clouds and Grids.
+
+\section{SAGA}
+SAGA~\cite{saga-core} is a high level API that provides a simple,
+standard and uniform interface for the most commonly required
+distributed functionality. SAGA can be used to encode distributed
+applications~\cite{saga_escience07_short, saga_tg08}, tool-kits to
+manage distributed applications as well as implement abstractions that
+support commonly occurring programming, access and usage patterns.
+
+\begin{figure}[t]
+\vspace{-2em}
+%\includegraphics[scale=0.5]{saga-figure02.pdf}
+\caption{In addition to the programmer's interface,
+ the other important components of the landscape are the SAGA engine,
+ and functional adaptors.} \vspace{-2em}
+\label{saga_figure}
+\end{figure}
+
+Fig.~\ref{saga_figure} provide a view of the SAGA landscape, and the
+main functional areas that SAGA provides a standardized interface
+to. Based upon an analysis of more than twenty applications, the most
+commonly required functionality involve job submission across
+different distributed platforms, support for file access and transfer,
+as well as logical file support. Less common, but equally critical,
+wherever they were required, is the support for Checkpoint and
+Recovery (CPR) and Service Discovery (SD). The API is written in C++
+with Python, C and Java language support. The {\it engine} is the main
+library, which provides dynamic support for run-time environment
+decision making through loading relevant adaptors. We will not discuss
+details of SAGA here; details can be found elsewhere~\cite{saga_url}.
+
+\jhanote{Include only if there is space: Some of the programming
+ models that are common to both data-intensive application and
+ Cloud-based computing, where there is an explicit cost-model for
+ data-movement, is to develop general heuristics on how we handle
+ common considerations such as when to move the data to the machine
+ or when to process it locally.}
+
+\subsection{Maybe a subsection or a paragraph on the role of Adaptors}
+
+Forward reference the section on the role of adaptors..
+
+
+\section{Clouds: An Emerging Distributed Infrastructure}
+
+In our opinion the primary distinguishing feature of Grids and
+Clouds is...
+
+
+\subsection{Amazon EC2:}
+
+\subsection{Eucalyptus}
+
+\subsection{Nimbus}
+
+
+
+\section{Patterns for Data-Intensive Computing: MapReduce and
+ All-Pairs}
+
+In this paper we will demonstrate the use of SAGA in implementing well
+known programming patterns for data intensive computing.
+Specifically, we have implemented MapReduce and the
+All-Pairs~\cite{allpairs_short} patterns, and have used their
+implementations in SAGA to to solve commonly encountered genomic
+tasks. We have also developed real scientific applications using SAGA
+based implementations of these patterns: multiple sequence alignment
+can be orchestrated using the SAGA-All-pairs implementation, and
+genome searching can be implemented using SAGA-MapReduce.
+
+\jhanote{Only if space permits: We will discuss other performance
+ issues that arise when implementing abstractions specific for
+ data-intensive computing. A grid application's design should not
+ focus on the bandwidth of the network, the dispatch latency, the
+ number of machines available, and data reliability. Even something
+ as simple as process size can be a tough challenge to optimize. If
+ a job is too small, then network traffic becomes a bottleneck and
+ the design is inefficient. If a job is too large, it is difficult
+ to tell when it is hanging or still computing. Also, if another job
+ with a higher priority takes a machine over, the application will be
+ waiting on jobs longer. The main point of this paper is to show how
+ a flexible, extensible implementation of programming data-intensive
+ abstractions using SAGA can shield the application developer from
+ many of these considerations, while still providing the
+ sophisticated end-user the ability to control these performance and
+ cost critical/determining factors.}
+
+{\bf MapReduce: } MapReduce~\cite{mapreduce-paper} is a programming
+framework which supports applications which operate on very large data
+sets on clusters of computers. MapReduce relies on a number of
+capabilities of the underlying system, most related to file
+operations. Others are related to process/data
+allocation. % The Google File-System, and other
+% distributed file-systems (DFS), provide the relevant capabilities,
+% such as atomic file renames. Implementations of MapReduce on these
+% DFS are free to focus on implementing the data-flow pipeline, which is
+% the algorithmic core of the MapReduce framework.
+One feature worth noting in MapReduce is that the ultimate dataset is
+not on one machine, it is partitioned on multiple machines distributed
+over a Grid. Google uses their distributed file system (Google File
+System) to keep track of where each file is located. Additionally,
+they coordinate this effort with Bigtable.
+
+{\bf SAGA-MapReduce Implementation:} We have recently implemented
+MapReduce in SAGA, where the system capabilities required by MapReduce
+are usually not natively supported. Our implementation interleaves the
+core logic with explicit instructions on where processes are to be
+scheduled. The advantage of this approach is that our implementation
+is no longer bound to run on a system providing the appropriate
+semantics originally required by MapReduce, and is portable to a
+broader range of generic systems as well. The drawback is that our
+implementation is relatively more complex -- it needs to add system
+semantic capabilities at some level, and it is inherently slower -- as
+it is difficult to reproduce system-specific optimizations to work
+generically.
+% it is for these capabilities very difficult or near impossible to
+% obtain system level performance on application level.
+Critically however, none of these complexities are transferred to the
+end-user, and they remain hidden within the framework. Also many of
+these are due to the early-stages of SAGA and incomplete
+implementation of features, and not a fundamental limitation of the
+design or concept of the interface or programming models that it
+supports.
+
+The overall architecture of the SAGA-MapReduce implementation is shown
+in Fig.~\ref{saga-mapreduce_controlflow}. This simple interface
+provides the complete functionality needed by any MapReduce algorithm,
+while hiding the more complex functionality, such as chunking of the
+input, sorting of the intermediate results, launching and coordinating
+the map and reduce workers, etc. as implemented by the framework. The
+application consists of two independent processes, a master and worker
+processes. The master process is responsible for:
+
+\begin{figure}[t]
+\upp
+\centering
+% \includegraphics[width=0.4\textwidth]{saga-mapreduce_controlflow.png}
+\caption{High-level control flow diagram for SAGA-MapReduce. SAGA uses
+ a master-worker paradigm to implement the MapReduce pattern. The
+ diagram shows that there are several different infrastructure
+ options to a SAGA based
+ application; % in particular for MapReduce there
+ \jhanote{I think there should be something between the Map(1) and
+ the Reduce(2) phases.. something that comes back to the Master,
+ non?}} \vspace{-2em}
+ \label{saga-mapreduce_controlflow}
+\end{figure}
+
+\begin{itemize}
+\item Launching all workers for the map and reduce steps as described
+ in a configuration file provided by the user
+\item Coordinating the executed workers, including the chunking of the
+ data, assigning the input data to the workers of the map step,
+ handling the intermediate data files produced by the map step and
+ passing the names of the sorted output files to the workers of the
+ reduce step, and collecting the generated outputs from the reduce
+ steps and relaunching single worker instances in case of failures,
+\end{itemize}
+
+The master process is readily available to the user and needs no
+modification for different Map and Reduce functions to execute. The
+worker processes get assigned work either from the map or the reduce
+step. The functionality for the different steps have to be provided by
+the user, which means the user has to write 2 C++ functions
+implementing the required MapReduce algorithm.
+Fig.\ref{src:saga-mapreduce} shows a very simple example of a
+MapReduce application to count the word frequencies in the
+input data set. The user provided functions |map| (line 14) and
+|reduce| (line 25) are invoked by the MapReduce framework during the
+map and reduce steps. The framework provides the URL of the input data
+chunk file to the |map| function, which should call the function
+|emitIntermediate| for each of the generated output key/value pairs
+(here the word and it's count, i.e. '1', line 19). During the
+reduce step, after the data has been sorted, this output data is
+passed to the |reduce| function. The framework passes the key and a
+list of all data items which have been associated with this key during
+the map step. The reduce step calls the |emit| function
+(line 34) for each of the final output elements (here: the word
+and its overall count). All key/value pairs that are passed to |emit|
+will be combined by the framework into a single output file.
+
+% \begin{figure}[!ht]
+% \begin{center}
+% \begin{mycode}[label=SAGA MapReduce Word Count Algorithm]
+% // Counting words using SAGA-MapReduce
+% using namespace std;
+% using namespace boost;
+
+% class CountWords
+% : public MapReduceBase<CountWords> {
+% public:
+% CountWords(int argc, char *argv[])
+% : MapReduceBase<CountWords>(argc, argv)
+% {}
+
+% // Separate input into words
+% // Input: url of input chunk (chk)
+% // Output: separated words and associated
+% // data (here: '1')
+% void map(saga::url chk) {
+% using namespace boost::iostreams;
+% stream<saga_file_device> in(chk.str());
+% string elem;
+% while(in >> elem)
+% emitIntermediate(elem, "1");
+% }
+
+% // Count words
+% // Input: word to count (key)
+% // list of associated data items
+% // Output: words and their count
+% void reduce(string const& key,
+% vector<string> const& values) {
+% typedef vector<string>::iterator iter;
+
+% int result = 0;
+% iter end = values.end();
+% for (iter it = values.begin();
+% it != end; ++it) {
+% result += lexical_cast<int>(*it);
+% }
+% emit(key, lexical_cast<string>(result));
+% }
+% };
+% \end{mycode}
+% \caption{\label{src:saga-mapreduce} Counting word frequencies using
+% SAGA-MapReduce. This is the worker-side code.}
+% \end{center}
+% \end{figure}
+
+As shown in Fig.~\ref{saga-mapreduce_controlflow} both, the master and
+the worker processes use the SAGA-API as an abstract interface to the
+used infrastructure, making the application portable between different
+architectures and systems. The worker processes are launched using the
+SAGA job package, allowing to launch the jobs either locally, using
+Globus/GRAM, Amazon Web Services, or on a Condor pool. The
+communication between the master and the worker processes is ensured
+by using the SAGA advert package, abstracting an information database
+in a platform independent way (this can also be achieved through
+SAGA-Bigtable adaptors). The Master process creates partitions of
+data (referred to as chunking, analogous to Google's MapReduce), so
+the data-set does not have to be on one machine and can be
+distributed; this is an important mechanism to avoid limitations in
+network bandwidth and data distribution. These files could then be
+recognized by a distributed File-System (FS) such as Hadoop-FS
+(HDFS). All file transfer operations are based on the SAGA file
+package, which supports a range of different FS and transfer
+protocols, such as local-FS, Globus/GridFTP, KFS, and HDFS.
+
+{\bf All-Pairs: } As the name suggests, All-Pairs involve comparing
+every element in a set to every element in another set. Such a
+pattern is pervasive and finds utility in many domains -- including
+testing the validity of an algorithm, or finding an anomaly in a
+configuration. For example, the accepted method for testing the
+strength of a facial recognition algorithm is to use All-Pairs
+testing. This creates a similarity matrix, and because it is known
+which images are the same person, the matrix can show the accuracy of
+the algorithm.
+
+% {\bf SAGA All-Pairs Implementation: } SAGA All-pairs implementation
+% is very similar to \sagamapreduce implementation. The main
+% difference is in the way jobs are run and how the data are stored.
+% In \sagamapreduce the final data is stored on many machines -- if
+% there is a DFS available, whereas SAGA All-pairs uses the database
+% to also store information about the job. We decided to do this
+% because all data must be available to be useful. We demonstrate the
+% SAGA All-Pairs abstraction using the HDFS and GridFTP to not only
+% show that SAGA allows for many different configurations, but also to
+% see how these different configurations behave. We have also used a
+% distributed data-store -- specifically HBase (Yahoo's implementation
+% of Bigtable) in lieu of the traditional Advert Service to store the
+% end-results.
+
+% {\it Multiple Sequence Alignment Using All-Pairs:} % All-Pairs is
+% An important problem in Bioinformatics -- Multiple Sequence Alignment
+% (MSA), can be reformulated to use All-Pairs pattern. It uses a
+% comparison matrix as a reference to compare many fragment genes to
+% many base genes. Each fragment is compared to every base gene to find
+% the smallest distance -- maximum overlap. Distance is computed by
+% summing up the amount of similarity between each nucleotide of the
+% fragment to each one in the base. This is done starting at every
+% point possible on the base.
+
+\section{Interfacing SAGA to Cloud-like Infrastructure: The role of
+ Adaptors}
+
+As alluded to, there is a proliferation of Clouds and Cloud-like
+systems, but it is important to remember that ``what constitutes or
+does not constitute a Cloud'' is not universally agreed upon. However
+there are several aspects and attributes of Cloud systems that are
+generally agreed upon~\cite{buyya_hpcc}. Here we will by necessity
+limit our discussion to two type of distributed file-systems (HDFS and
+KFS) and two types of distributed structured-data store (Bigtable and
+HBase). We have developed SAGA adaptors for these, have used
+\sagamapreduce (and All-Pairs) seamlessly on these infrastructure.
+
+{\it HDFS and KFS: } HDFS is a distributed parallel fault tolerant
+application that handles the details of spreading data across multiple
+machines in a traditional hierarchical file organization. Implemented
+in Java, HDFS is designed to run on commodity hardware while providing
+scalability and optimizations for large files. The FS works by having
+one or two namenodes (masters) and many rack-aware datanodes (slaves).
+All data requests go through the namenode that uses block operations
+on each data node to properly assemble the data for the requesting
+application. The goal of replication and rack-awareness is to improve
+reliability and data retrieval time based on locality. In data
+intensive applications, these qualities are essential. KFS (also
+called CloudStore) is an open-source high-performance distributed FS
+implemented in C++, with many of the same design features as HDFS.
+
+% Another advantage is the ability to be accessed through MapReduce.
+% Since MapReduce is inherently very well parallelized, accessing
+% Bigtable is very efficient.
+% Also, since the data are partitioned
+% accessing it does not create a large strain on the network bandwidth.
+
+{\it Bigtable and HBase:} Bigtable~\cite{bigtable_small} is a type of
+database system created by Google to have better control over
+scalability and performance than other databases. The main difference
+is that it is meant to store extremely large datasets, into the
+petabytes, over thousands of machines. It is well integrated with
+MapReduce. Due to the success of Bigtable, HBase was developed as an
+open source alternative to Bigtable for use with Hadoop. Both HBase
+and Bigtable split up large tables and replicate them over many
+machines to avoid node failure.
+
+There exist many other
+implementations of both distributed FS (such as Sector) and of
+distributed data-store (such as Cassandra and Hybertable); for the
+most part they are variants on the same theme technically, but with
+different language and performance criteria optimizations. Hypertable
+is an open-source implementation of Bigtable; Cassandra is a Bigtable
+clone but eschews an explicit coordinator (Bigtable's Chubby, HBase's
+HMaster, Hypertable's Hyperspace) for a P2P/DHT approach for data
+distribution and location and for availability. In the near future we
+will be providing adaptors for
+Sector\footnote{http://sector.sourceforge.net/} and
+Cassandra\footnote{http://code.google.com/p/the-cassandra-project/}.
+And although Fig.~\ref{saga_figure} explicitly maps out different
+functional areas for which SAGA adaptors exist, there can be multiple
+adaptors (for different systems) that implement that functionality; the
+SAGA run-time dynamically loads the correct adaptor, thus providing
+both an effective abstraction layer as well as an interesting means of
+providing interoperability between different Cloud-like
+infrastructure. As testimony to the power of SAGA, the ability to
+create the relevant adaptors in a lightweight fashion and thus extend
+applications to different systems with minimal overhead is an
+important design feature and a significant requirement so as to be an
+effective programming abstraction layer.
+
+\section{SAGA: An interface to Clouds and Grids}
+
+
+The total time to completion ($T_c$) of a \sagamapreduce job, can be
+decomposed into three primary components: $t_{pp}$ defined as the time
+for pre-processing -- which in this case is the time to chunk into
+fixed size data units, and to possibly distribute them. This is in
+some ways the overhead of the process. $t_{comp}$ is the time to
+actually compute the map and reduce function on a given worker, whilst
+$t_{coord}$ is the time taken to assign the payload to a worker,
+update records and to possibly move workers to a destination
+resource. $t_{coord}$ is indicative of the time that it takes to
+assign chunks to workers and scales as the number of workers
+increases. In general:
+
+\vspace{-1em}
+\begin{eqnarray}
+T_c = t_{pp} + t_{comp} + t_{coord}
+\end{eqnarray}
+
+To establish the effectiveness of SAGA as a mechanism to develop
+distributed applications, and the ability of \sagamapreduce to be
+provide flexibility in distributing compute units, we have designed
+the following experiment set\footnote{We have also distinguished
+ between SAGA All-Pairs using Advert Service versus using HBase or
+ Bigtable as distributed data-store, but due to space constraints we
+ will report results of the All-Pairs experiments elsewhere.} :
+\begin{enumerate}
+\item Both \sagamapreduce workers
+ (compute) and data-distribution are local. Number of workers vary
+ from 1 to 10, and the data-set sizes varying from 1 to 10GB. % Here we
+% will also compare \sagamapreduce with native MapReduce (using HDFS
+% and Hadoop)
+\item \sagamapreduce workers compute local (to master), but using a
+ distributed FS (HDFS)
+% upto 3 workers (upto a data-set size of 10GB).
+\item Same as Exp. \#2, but using a different distributed FS
+ (KFS); the number of workers varies from 1-10
+\item \sagamapreduce using distributed compute (workers) and distributed file-system (KFS)
+\item Distributed compute (workers) but using local file-systems (using GridFTP for transfer)
+\item {\bf NEEDS MODIFICATION}
+\end{enumerate}
+
+
+{\bf SAGA-MapReduce on Grids:} We begin with the observation that the
+efficiency of \sagamapreduce is pretty close to 1, actually better
+than 1 -- like any good (data) parallel applications should be. For
+1GB data-set, \tc = 659s and for 10GB \tc = 6286s. The efficiency
+remains at or around 1, even when the compute is distributed over two
+machines: 1 worker at each site: \tc = 672s, \tc = 1081s and \tc
+=2051s for 1, 2 and 4GB respectively; this trend is valid even when
+the number of workers per site is more than 1.
+
+Fig.~\ref{grids1} plots the \tc for different number of active workers
+on different data-set sizes; the plots can be understood using the
+framework provided by Equation 1. For each data-set (from 1GB to 10GB)
+there is an overhead associated with chunking the data into 64MB
+pieces; the time required for this scales with the number of chunks
+created. Thus for a fixed chunk-size (as is the case with our
+set-up), $t_{pp}$ scales with the data-set size. As the number of
+workers increases, the payload per worker decreases and this
+contributes to a decrease in time taken, but this is accompanied by a
+concomitant increase in $t_{coord}$. However, we will establish that
+the increase in $t_{coord}$ is less than the decrease in
+$t_{comp}$. Thus the curved decrease in \tc can be explained by a
+speedup due to lower payload as the number of workers increases whilst
+at the same time the $t_{coord}$ increases; although the former is
+linear, due to increasing value of the latter, the effect is a
+curve. The plateau value is dominated by $t_{pp}$ -- the overhead of
+chunking etc, and so increasing the number of workers beyond a point
+does not lead to a further reduction in \tc.
+
+To take a real example, we consider two data-sets, of sizes 1GB and
+5GB and vary the chunk size, between 32MB to the maximum size
+possible, i.e., chunk sizes of 1GB and 5GB respectively. In the
+configuration where there is only one chunk, $t_{pp}$ should be
+effectively zero (more likely a constant), and \tc will be dominated
+by the other two components -- $t_{comp}$ and $t_{coord}$.
+For 1GB and 5GB, the ratio of \tc for this boundary case
+is very close to 1:5, providing strong evidence that the $t_{comp}$
+has the bulk contribution, as we expect $t_{coord}$ to remain mostly
+the same, as it scales either with the number of chunks and/or with
+the number of workers -- which is the same in this case. Even if
+$t_{coord}$ does change, we do not expect it to scale by a factor of
+5, while we do expect $t_{comp}$ to do so.
+
+\begin{figure}[t]
+% \includegraphics[width=0.4\textwidth]{MapReduce_local_executiontime.png}
+ \caption{Plots showing how the \tc for different data-set sizes
+ varies with the number of workers employed. For example, with
+ larger data-set sizes although $t_{pp}$ increases, as the number
+ of workers increases the workload per worker decreases, thus
+ leading to an overall reduction in $T_c$. The advantages of a
+ greater number of workers is manifest for larger data-sets.}
+\label{grids1}
+\upp
+\upp
+\upp
+\upp
+\upp
+\upp
+\upp
+\upp
+\end{figure}
+
+
+{\bf SAGA-MapReduce on Cloud-like infrastructure: } Accounting for the
+fact that time for chunking is not included, Yahoo's MapReduce takes a
+factor of 2 less time than \sagamapreduce
+(Fig.~\ref{mapreduce_timing_FS}). This is not surprising, as
+\sagamapreduce implementations have not been optimized, e.g.,
+\sagamapreduce is not multi-threaded.
+\begin{figure}[t]
+\upp
+ \centering
+% \includegraphics[width=0.40\textwidth]{mapreduce_timing_FS.pdf}
+ \caption{\tc for \sagamapreduce using one worker (local to
+ the master) for different configurations. The label
+ ``Hadoop'' represents Yahoo's MapReduce implementation;
+ \tc for Hadoop is without chunking, which takes
+ several hundred sec for larger data-sets. The ``SAGA
+ MapReduce + Local FS'' corresponds to the use of the local
+ FS on Linux clusters, while the label ``SAGA + HDFS''
+ corresponds to the use of HDFS on the clusters. Due to
+ simplicity, of the Local FS, its performance beats
+ distributed FS when used in local mode.}
+ % It is interesting to note that as the data-set sizes get
+ % larger, HDFS starts outperforming local FS. We attribute
+ % this to the use of caching and other advanced features in
+ % HDFS which prove to be useful, even though it is not being
+ % used in a distributed fashion. scenarios considered are
+ % (i) all infrastructure is local and thus SAGA's local
+ % adapters are invoked, (ii) local job adaptors are used,
+ % but the hadoop file-system (HDFS) is used, (iii) Yahoo's
+ % mapreduce.
+% \label{saga_mapreduce_1worker.png}
+ \label{mapreduce_timing_FS}
+\upp
+\end{figure}
+Experiment 5 (Table~\ref{exp4and5}) provides insight into performance
+figure when the same number of workers are available, but are either
+all localized, or are split evenly between two similar but distributed
+machines. It shows that to get lowest $T_c$, it is often required to
+both distribute the compute and lower the workload per worker; just
+lowering the workload per worker is not good enough as there is still
+a point of serialization (usually local I/O). % It shows that when
+% workload per worker gets to a certain point, it is beneficial to
+% distribute the workers, as the machine I/0 becomes the bottleneck.
+When coupled with the advantages of a distributed FS, the ability to
+both distribute compute and data provides additional performance
+advantage, as shown by the values of $T_c$ for both distributed
+compute and DFS cases in Table~\ref{exp4and5}.
+
+\begin{table}
+\upp
+\begin{tabular}{ccccc}
+ \hline
+ \multicolumn{2}{c}{Configuration} & data size & work-load/worker & $T_c$ \\
+
+ compute & data & (GB) & (GB/W) & (sec) \\
+ \hline
+% local & 1 & 0.5 & 372 \\
+% \hline
+% distributed & 1 & 0.25 & 372 \\
+% \hline \hline
+ local & local-FS & 1 & 0.1 & 466 \\
+ \hline
+ distributed & local-FS & 1 & 0.1 & 320 \\
+ \hline
+ distributed & DFS & 1 & 0.1 & 273.55 \\
+ \hline \hline
+ local & local-FS & 2 & 0.25 & 673 \\
+ \hline
+ distributed & local-FS & 2 & 0.25 & 493 \\
+ \hline
+ distributed & DFS & 2 & 0.25 & 466 \\
+ \hline \hline
+ local & local-FS & 4 & 0.5 & 1083\\
+ \hline
+ distributed & local-FS & 4 & 0.5& 912 \\
+ \hline
+ distributed & DFS & 4 & 0.5 & 848 \\
+ \hline \hline
+\end{tabular}
+\upp
+\caption{Table showing \tc for different configurations of compute
+ and data. The two compute configurations correspond to the situation
+ where all workers are either
+ placed locally or workers are distributed across two different resources. The data configurations arise when using a single local FS or a distributed FS (KFS) with 2 data-servers. It is evident from performance figures that an optimal value arises when distributing both data and compute.} \label{exp4and5}
+\upp
+\upp
+\end{table}
+
+{\bf SAGA-MapReduce on Clouds: } Thanks to the low overhead of
+developing adaptors, SAGA has been deployed on three Cloud Systems --
+Amazon, Nimbus~\cite{nimbus} and Eucalyptus~\cite{eucalyptus} (we have
+a local installation of Eucalyptus, referred to as GumboCloud). On
+EC2, we created custom virtual machine (VM) image with preinstalled
+SAGA. For Eucalyptus and Nimbus, a boot strapping script equips a
+standard VM instance with SAGA, and SAGA's prerequisites (mainly
+boost). To us, a mixed approach seemed most favourable, where the
+bulk software installation is statically done via a custom VM image,
+but software configuration and application deployment are done
+dynamically during VM startup.
+
+There are several aspects to Cloud Interoperability. A simple form of
+interoperability -- more akin to inter-changeable -- is that any
+application can use either of the three Clouds systems without any
+changes to the application: the application simply needs to
+instantiate a different set of security credentials for the respective
+runtime environment, aka cloud. Interestingly, SAGA provides this level of
+interoperability quite trivially thanks to the adaptors.
+
+By almost trivial extension, SAGA also provides Grid-Cloud
+interoperability, as shown in Fig.~\ref{gramjob} and ~\ref{vmjob},
+where exactly the same interface and functional calls lead to job
+submission on Grids or on Clouds. Although syntactically identical,
+the semantics of the calls and back-end management are somewhat
+different. For example, for Grids, a \texttt{job\_service} instance
+represents a live job submission endpoint, whilst for Clouds it
+represents a VM instance created on the fly. It takes SAGA about 45
+seconds to instantiate a VM on Eucalyptus, and about 90 seconds on
+EC2. Once instantiated, it takes about 1 second to assign a job to a
+VM on Eucalyptus, or EC2. It is a configurable option to tie the VM
+lifetime to the \texttt{job\_service} object lifetime, or not.
+
+We have also deployed \sagamapreduce to work on Cloud platforms. It
+is critical to mention that the \sagamapreduce code did not undergo
+any changes whatsoever. The change lies in the run-time system and
+deployment architecture. For example, when running \sagamapreduce on
+EC2, the master process resides on one VM, while workers reside on
+different VMs. Depending on the available adaptors, Master and Worker
+can either perform local I/O on a global/distributed file system, or
+remote I/O on a remote, non-shared file systems. In our current
+implementation, the VMs hosting the master and workers share the same
+ssh credentials and a shared file-system (using sshfs/FUSE).
+Application deployment and configuration (as discussed above) are also
+performed via that sshfs. Due to space limitations we will not
+discuss the performance data of \sagamapreduce with different data-set
+sizes and varying worker numbers.
+
+\begin{figure}[!ht]
+\upp
+ \begin{center}
+ \begin{mycode}[label=SAGA Job Launch via GRAM gatekeeper]
+ { // contact a GRAM gatekeeper
+ saga::job::service js;
+ saga::job::description jd;
+ jd.set_attribute (``Executable'', ``/tmp/my_prog'');
+ // translate job description to RSL
+ // submit RSL to gatekeeper, and obtain job handle
+ saga:job::job j = js.create_job (jd);
+ j.run ():
+ // watch handle until job is finished
+ j.wait ();
+ } // break contact to GRAM
+ \end{mycode}
+ \caption{\label{gramjob}Job launch via Gram }
+ \end{center}
+\upp
+\end{figure}
+
+
+\begin{figure}[!ht]
+\upp
+ \begin{center}
+ \begin{mycode}[label=SAGA create a VM instance on a Cloud]
+ {// create a VM instance on Eucalyptus/Nimbus/EC2
+ saga::job::service js;
+ saga::job::description jd;
+ jd.set_attribute (``Executable'', ``/tmp/my_prog'');
+ // translate job description to ssh command
+ // run the ssh command on the VM
+ saga:job::job j = js.create_job (jd);
+ j.run ():
+ // watch command until done
+ j.wait ();
+ } // shut down VM instance
+ \end{mycode}
+ \caption{\label{vmjob} Job launch via VM}
+ \end{center}
+\upp
+\end{figure}
+
+\section{Conclusion}
+We have demonstrated the power of SAGA as a programming interface and
+as a mechanism for codifying computational patterns, such as MapReduce
+and All-Pairs. Patterns capture a dominant and recurring
+computational mode; by providing explicit support for such patterns,
+end-users and domain scientists can reformulate their scientific
+problems/applications so as to use these patterns. % For example, we
+% have shown how traditional applications such as MSA and Gene Search
+% can be implemented using the All-Pairs and MapReduce patterns.
+This
+provides further motivation for abstractions at multiple-levels.
+%support basic functionality but also data-intensive patterns.
+We have shown the power of abstractions for data-intensive computing
+% patterns and
+% abstractions
+% that support such patterns,
+by demonstrating how SAGA, whilst providing the required controls and
+supporting relevant programming models, can decouple the development
+of applications from the deployment and details of the run-time
+environment.
+
+\section{Acknowledgments}
+
+SJ acknowledges UK EPSRC grant number GR/D0766171/1 for supporting
+SAGA and the e-Science Institute, Edinburgh for the research theme,
+``Distributed Programming Abstractions''. This work would not have
+been possible without the efforts and support of other members of the
+SAGA team. In particular, \sagamapreduce was written by Chris and
+Michael Miceli with assistance from Hartmut Kaiser. We also
+acknowledge internal resources of the Center for Computation \&
+Technology (CCT) at LSU and computer resources provided by LONI.
+\bibliographystyle{plain} \bibliography{saga_data_intensive}
+\end{document}
+
+\jhanote{traditional algorithms are OK when data are limited.. but
+ data requirements can change in different ways.. not only will data
+ be greater, but also after a point will also be distributed. This is
+ indicative of the data-gathering process (multiple-resources,
+ replicated?) also indicative of the fact that often data-collection
+ is growing greater than data-analysis (for example exabyte data last
+ year). Finally, with data-distributed, comes the following (at
+ least challenges): do we move data-to-compute, or compute-to-data,
+ is there a transition point, and can the same application be written
+ to support both modes?}
+
+\jhanote{From above, we need to write what has been traditionally
+ non-distributed applications, in a distributed fashion. General
+ philosophy on how we write distributed applications... One approach
+ is to use infrastructure independent frameworks.}
+
+\jhanote{Motivation for why this work is important. Applications
+ are developed with specific infrastructure in mind. So in a
+ way are limited by the infrastructure.. so whereas application
+ can be often be written to scale, infrastructure doesn't
+ always..}
+
+\jhanote{This is a new way of developing applications using high-level
+ abstractions. These high-level abstractions in turn ways to
+ encode/support certain patterns, in this case data-parallel/access
+ patterns. In turn the abstractions are infrastructure independent}.
+
+
+\jhanote{Outline the work in this paper. New concept. Thus one
+ important requirement is to determine how these
+ i) how these patterns work for real scientific applications, \\
+ ii) how well the general-implementations of these patterns work with
+ respect to native implementations of these patterns \\
+ iii) combination of i and ii, i.e. how well these applications
+ behave when encoded using these high-level abstractions
+ in comparison on general purpose infrastructure\\
+ The aim is not to report better or even equivalent performance of
+ these ``generalized applications compared to the native
+ implementation of these application..}
+
+
+
+\begin{table}
+\upp
+\begin{tabular}{cccc}
+ \hline
+ Configuration & data size & work-load/worker & $T_c$ \\
+ \hline
+% {\multicolumn{2}{c|c} compute & data} & (GB) & (GB/W) & (sec) \\
+ compute \& data & (GB) & (GB/W) & (sec) \\
+ \hline
+% local & 1 & 0.5 & 372 \\
+% \hline
+% distributed & 1 & 0.25 & 372 \\
+% \hline \hline
+ local \& local-FS & 1 & 0.1 & 466 \\
+ \hline
+ distributed \& local-FS & 1 & 0.1 & 320 \\
+ \hline
+ distributed \& KFS & 1 & 0.1 & 273.55 \\
+ \hline \hline
+ local \& local-FS & 2 & 0.25 & 673 \\
+ \hline
+ distributed \& local-FS & 2 & 0.25 & 493 \\
+ \hline
+ distributed \& KFS & 2 & 0.25 & 466 \\
+ \hline \hline
+ local \& local-FS & 4 & 0.5 & 1083\\
+ \hline
+ distributed \& local-FS & 4 & 0.5& 912 \\
+ \hline
+ distributed \& KFS & 4 & 0.5 & 848 \\
+ \hline \hline
+\end{tabular}
+\upp
+\caption{Table showing \tc for different configurations of compute
+ and data. The two compute configurations correspond to the situation
+ where all workers are either
+ placed locally or workers are distributed across two different resources. The data configurations arise when using a single local FS or a distributed FS (KFS) with 2 data-servers. It is evident from performance figures that an optimal value arises when distributing both data and compute.}\label{exp4and5}
+\upp
+\upp
+\end{table}
\ No newline at end of file
File [added]: saga_data_intensive.bib
Delta lines: +6767 -0
===================================================================
--- papers/clouds/saga_data_intensive.bib 2009-01-11 00:28:41 UTC (rev 837)
+++ papers/clouds/saga_data_intensive.bib 2009-01-12 19:46:36 UTC (rev 838)
@@ -0,0 +1,6767 @@
+ at misc{saga-req,
+ Author = {Andre Merzky and Shantenu Jha},
+ Date-Added = {2008-02-24 20:36:03 -0600},
+ Date-Modified = {2008-02-24 20:36:03 -0600},
+ Howpublished = {Grid Forum Document GFD.71},
+ Note = {Global Grid Forum},
+ Title = {{A Requirements Analysis for a Simple API for Grid Applications}},
+ Year = 2006}
+
+ at misc{saga_gfd90,
+author={Tom Goodale and Shantenu Jha and Hartmut Kaiser and Thilo Kielmann and Pascal Kleijer and Andre Merzky and John Shalf and Christopher Smith},
+title="{A Simple API for Grid Applications (SAGA)}",
+howpublished = {OGF Document Series 90,
+http://www.ogf.org/documents/GFD.90.pdf}}
+
+ at misc{saga-uc,
+ Author = {Andre Merzky and Shantenu Jha},
+ Date-Added = {2008-02-24 20:35:56 -0600},
+ Date-Modified = {2008-02-24 20:35:56 -0600},
+ Howpublished = {Grid Forum Document GFD.70},
+ Note = {Global Grid Forum},
+ Title = {{A Collection of Use Cases for a Simple API for Grid Appli
+cations}},
+ Year = 2006}
+
+ at inproceedings{mapreduce-paper,
+ Address = {Berkeley, CA, USA},
+ Author = {Jeffrey Dean and Sanjay Ghemawat},
+ Booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Sys
+tems Design \& Implementation},
+ Date-Added = {2008-02-23 20:47:01 -0600},
+ Date-Modified = {2008-05-12 23:43:10 +0200},
+ Location = {San Francisco, CA},
+ Pages = {137--150},
+ OPT Publisher = {USENIX Association},
+ Title = {{MapReduce: Simplified Data Processing on Large Clusters}},
+ Year = {2004}}
+
+ at misc{allpairs, note = {All-Pairs: An Abstraction for Data Intensive Cloud Computing, Christopher Moretti, Jared Bulosan, Douglas Thain, and Patrick Flynn, IEEE International Parallel and Distributed Processing Symposium (IPDPS), April 2008.}}
+
+ at misc{allpairs_short, note = {All-Pairs: An Abstraction for Data Intensive Cloud Computing, Christopher Moretti et al, IEEE IPDPS, 2008.}}
+
+ at misc{bigtable, note = {Bigtable: A Distributed Storage System for Strctured Data, Fay Chang, Jeffrey Dean, Sanjay Ghemwat, Wilson C. Hsieh, Deborah A. Wallach, Mike Burrows, Tushar Chandra, Andrew Fikes, and Robert E. Gruber, OSDI'06: Seventh Symposium on Operating System Design and Implementation, Seattle, WA, November, 2006.}}
+
+ at misc{bigtable_small, note = {Bigtable: A Distributed Storage System for Strctured Data, Fay Chang, and et al, OSDI'06: 7th Symp. on Operating System Design and Implementation, Nov 06}}
+
+ at article{sagastuff,
+ Author = {Goodale, Tom and Jha, Shantenu and Kaiser, Harmut
+ and Kielmann, Thilo and K leijer, Pascal and von Laszewski,
+ Gregor and Lee, Craig and Merzky, Andre and Rajic, Hrabri a nd
+ Shalf, John}, Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100}, Journal =
+ {Computational Methods in Science and Technology}, Number =
+ {1}, Pages = {7-20}, Title = {{SAGA: A Simple API for Grid
+ Applications, High-Level Application Programmin g on the
+ Grid}}, Url =
+ {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_applicat
+ ions_sc05.pdf}, Volume = {12}, Year = {2006}, Bdsk-Url-1 =
+ {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_a
+ pplications_sc05.pdf}}
+
+ at inproceedings{saga_gin,
+ Address = {Washington, DC, USA},
+ Author = {Shantenu Jha and Hartmut Kaiser and Andre Merzky and
+ Ole Weidner},
+ Booktitle = {E-SCIENCE '07: Proceedings of the Third IEEE
+ International Conference on e-Science and Grid
+ Computing (e-Science 2007)},
+ Date-Added = {2008-02-16 18:54:59 -0600},
+ Date-Modified = {2008-02-16 18:54:59 -0600},
+ Doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39},
+ Isbn = {0-7695-3064-8},
+ Pages = {584--591},
+ Publisher = {IEEE Computer Society},
+ Title = {Grid Interoperability at the Application Level Using SAGA},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39}}
+
+ at inproceedings{saga_escience07,
+ Author = {Jha, Shantenu and Kaiser, Hartmut and El Khamra,
+ Yaakoub and Weidner, Ole},
+ Booktitle = {Accepted for 3rd IEEE Conference on eScience2007 and Grid Computing, Ban
+galore, India.},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Title = {{Design and Implementation of Network Performance
+ Aware Applications Using SAGA and Cactus}},
+ Url = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf}}
+
+ at inproceedings{saga_escience07_short,
+ Author = {S Jha et al},
+ Booktitle = {Accepted for 3rd IEEE Conference on eScience2007 and Grid Computing, Bangalore, India.},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Title = {{Design and Implementation of Network Performance
+ Aware Applications Using SAGA and Cactus}},
+ Url = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf}}
+
+
+ at misc{saga_tg08, note = {Developing Large-Scale Adaptive Scientific
+Applications with Hard to Predict Runtime Resource Requirements, {\it
+Proceedings of TeraGrid08}, available at http://tinyurl.com/5du32j}}
+
+
+ at misc{saga_mapreduce, note={Exposing the Power of Google through SAGA, {\it Google Summer of Code} http://www.omii.ac.uk/wiki/MPGoogleSAGA}}
+
+ at Article{SPdynamics,
+ author = {Shirts, Michael R. and Pande, Vijay S.},
+ OPTtitle = {{Mathematical Analysis of Coupled Parallel Simulations}},
+ journal = {Phys. Rev. Lett.},
+ volume = {86},
+ number = {22},
+ pages = {4983--4987},
+ numpages = {4},
+ year = {2001},
+ month = {May},
+ doi = {10.1103/PhysRevLett.86.4983},
+ publisher = {American Physical Society}
+}
+
+ at article{pande_bj03,
+author = {Rhee, Young Min and Pande, Vijay S.},
+title = {{Multiplexed-Replica Exchange Molecular Dynamics Method for Protein Folding Simulat
+ion}},
+journal = {Biophys. J.},
+volume = {84},
+number = {2},
+pages = {775-786},
+year = {2003},
+OPTURL = {http://www.biophysj.org/cgi/content/abstract/84/2/775},
+OPTeprint = {http://www.biophysj.org/cgi/reprint/84/2/775.pdf} }
+
+
+
+ at misc{dpa-paper, note = {S. Jha et al., {\em Programming Abstractions
+ for Large-scale Distributed Application s}, to be
+ submitted to ACM Computing Surveys; draft at \url{http://www.cct.lsu.edu/~sjha/publications/dpa_surveypaper.pdf}}}
+
+ at misc{remd-manager_url, note={https://svn.cct.lsu.edu/repos/saga-projects/applications/REMDgManager/src/main.py}}
+
+ at misc{saga_url, note={{http://saga.cct.lsu.edu}}}
+
+ at misc{glite, note={{http://glite.web.cern.ch/glite/}}}
+
+ at inproceedings{escience07,
+ author = {Shantenu Jha and Hartmut Kaiser and Yaakoub El Khamra and Ole Weidner},
+ title = {{Design and Implementation of Network Performance Aware Applications Using SAGA and Cactus}},
+ booktitle = {E-SCIENCE '07: Proceedings of the Third IEEE International Conference on e-Science and Grid Computing},
+ year = {2007},
+ isbn = {0-7695-3064-8},
+ pages = {143--150},
+ doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.28},
+ OPTpublisher = {IEEE Computer Society},
+ OPTaddress = {Washington, DC, USA},
+ }
+
+%% This BibTeX bibliography file was created using BibDesk.
+%% http://bibdesk.sourceforge.net/
+
+
+%% Created for luckow at 2008-08-09 21:04:53 +0200
+
+
+%% Saved with string encoding Unicode (UTF-8)
+
+
+ at preamble{"\providecommand{\noopsort}[1]{}"}
+
+
+ at misc{loni,
+ Date-Added = {2008-08-11 00:52:45 +0200},
+ Date-Modified = {2008-08-11 00:55:00 +0200},
+ Howpublished = {\url{http://www.loni.org}},
+ Title = {{LONI: Louisiana Optical Network Initiative}}
+}
+
+ at inproceedings{1362680,
+ Address = {New York, NY, USA},
+ Author = {Ioan Raicu and Yong Zhao and Catalin Dumitrescu and Ian Foster and Mike Wilde},
+ Booktitle = {SC '07: Proceedings of the 2007 ACM/IEEE conference on Supercomputing},
+ Date-Added = {2008-08-09 21:04:33 +0200},
+ Date-Modified = {2008-08-09 21:04:53 +0200},
+ Doi = {http://doi.acm.org/10.1145/1362622.1362680},
+ Isbn = {978-1-59593-764-3},
+ Location = {Reno, Nevada},
+ Pages = {1--12},
+ Publisher = {ACM},
+ Title = {{Falkon: A Fast and Light-Weight TasK ExecutiON Framework}},
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1362622.1362680}}
+
+ at article{Phillips:2005gd,
+ Author = {J. Phillips and R. Braun and W. Wang and J. Gumbart and E. Tajkhorshid and E. Villa and C. Chipot and R. Skeel and L. Kale and K. Schulten},
+ Date-Added = {2008-08-08 22:10:20 +0200},
+ Date-Modified = {2008-08-08 22:12:28 +0200},
+ Journal = {Journal of Computational Chemistry},
+ Pages = {1781-1802},
+ Title = {{Scalable molecular dynamics with NAMD}},
+ Volume = {26},
+ Year = {2005}}
+
+ at article{Vadhiyar:2005:SAG,
+ Address = {Chichester, UK, UK},
+ Author = {Sathish S. Vadhiyar and Jack J. Dongarra},
+ Date-Added = {2008-08-02 17:55:25 +0200},
+ Date-Modified = {2008-08-02 17:56:08 +0200},
+ Doi = {http://dx.doi.org/10.1002/cpe.v17:2/4},
+ Issn = {1532-0626},
+ Journal = {Concurrency and Computation: Practice and Experience},
+ Number = {2-4},
+ Pages = {235--257},
+ Publisher = {John Wiley and Sons Ltd.},
+ Title = {{Self adaptivity in Grid computing}},
+ Volume = {17},
+ Year = {2005},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1002/cpe.v17:2/4}}
+
+ at inproceedings{1033223,
+ Address = {Washington, DC, USA},
+ Author = {D. Anderson},
+ Booktitle = {GRID '04: Proceedings of the Fifth IEEE/ACM International Workshop on Grid Computing},
+ Date-Added = {2008-07-30 19:58:37 +0200},
+ Date-Modified = {2008-07-30 19:58:46 +0200},
+ Doi = {http://dx.doi.org/10.1109/GRID.2004.14},
+ Isbn = {0-7695-2256-4},
+ Pages = {4--10},
+ Publisher = {IEEE Computer Society},
+ Title = {{BOINC: A System for Public-Resource Computing and Storage}},
+ Year = {2004},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/GRID.2004.14}}
+
+ at misc{wisdom,
+ Date-Added = {2008-07-30 19:45:33 +0200},
+ Date-Modified = {2008-07-30 19:46:18 +0200},
+ Howpublished = {\url{http://wisdom.eu-egee.fr/}},
+ Title = {{WISDOM -- Initiative for Grid-Enabled Drug Discovery Against Neglected and Emergent Diseases}}}
+
+ at misc{folding,
+ Date-Added = {2008-07-30 19:44:40 +0200},
+ Date-Modified = {2008-07-30 19:45:06 +0200},
+ Howpublished = {\url{http://folding.stanford.edu/}},
+ Title = {{Folding at Home}}}
+
+ at article{PhysRevLett.86.4983,
+ Author = {Shirts, M. and Pande, S.},
+ Date-Added = {2008-07-30 19:28:02 +0200},
+ Date-Modified = {2008-07-30 19:28:41 +0200},
+ Doi = {10.1103/PhysRevLett.86.4983},
+ Journal = {Physical Review Letters},
+ Month = {May},
+ Number = {22},
+ Numpages = {4},
+ Pages = {4983--4987},
+ Publisher = {American Physical Society},
+ Title = {{Mathematical Analysis of Coupled Parallel Simulations}},
+ Volume = {86},
+ Year = {2001},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1103/PhysRevLett.86.4983}}
+
+ at misc{jni,
+ Date-Added = {2008-07-20 23:29:38 +0200},
+ Date-Modified = {2008-07-20 23:36:14 +0200},
+ Howpublished = {\url{http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/jniTOC.html}},
+ Title = {{Java Native Interface Specification}},
+ Year = {2003}}
+
+ at misc{jee,
+ Date-Added = {2008-07-20 13:57:09 +0200},
+ Date-Modified = {2008-07-20 13:58:32 +0200},
+ Howpublished = {\url{http://jcp.org/en/jsr/detail?id=244}},
+ Key = {jee java},
+ Title = {{JSR 244: JavaTM Platform, Enterprise Edition 5 (Java EE 5) Specification}},
+ Year = {2008}}
+
+ at misc{jaxws,
+ Date-Added = {2008-07-20 13:27:27 +0200},
+ Date-Modified = {2008-07-20 13:58:16 +0200},
+ Howpublished = {\url{http://jcp.org/en/jsr/detail?id=224}},
+ Title = {{JSR 224: Java API for XML-Based Web Services (JAX-WS) 2.0}},
+ Year = {2007}}
+
+ at misc{stax,
+ Date-Added = {2008-07-20 13:26:29 +0200},
+ Date-Modified = {2008-07-20 13:58:25 +0200},
+ Howpublished = {\url{http://jcp.org/en/jsr/detail?id=173}},
+ Key = {stax},
+ Title = {{JSR 173: Streaming API for XML}},
+ Year = {2007}}
+
+ at misc{axis2,
+ Date-Added = {2008-07-20 13:20:40 +0200},
+ Date-Modified = {2008-07-20 13:20:59 +0200},
+ Howpublished = {\url{http://ws.apache.org/axis2/}},
+ Key = {axis},
+ Title = {{Axis2 Web Services}},
+ Year = {2008}}
+
+ at misc{hibernate,
+ Date-Added = {2008-07-19 23:26:38 +0200},
+ Date-Modified = {2008-07-19 23:27:18 +0200},
+ Howpublished = {\url{http://www.hibernate.org/}},
+ Key = {hibernate},
+ Title = {{Hibernate Homepage}},
+ Year = {2008}}
+
+ at book{Gamma:1995:DPE,
+ Author = {Gamma, E. and Helm, R. and Johnson, R. and Vlissides, J.},
+ Date-Added = {2008-07-19 11:50:38 +0200},
+ Date-Modified = {2008-07-19 11:51:34 +0200},
+ Keywords = {design patterns},
+ Publisher = {Addison-Wesley Professional},
+ Title = {{Design Patterns: Elements of Reusable Object-Oriented Software}},
+ Year = {1995}}
+
+ at misc{Rotem-Gal-Oz:2006gd,
+ Author = {Arnon Rotem-Gal-Oz },
+ Date-Added = {2008-07-18 19:38:46 +0200},
+ Date-Modified = {2008-07-18 21:43:07 +0200},
+ Howpublished = {\url{http://www.rgoarchitects.com/Files/fallacies.pdf}},
+ Title = {{Fallacies of Distributed Computing Explained}},
+ Year = {2006}}
+
+ at misc{w3c,
+ Date-Added = {2008-07-17 22:07:12 +0200},
+ Date-Modified = {2008-07-17 22:08:17 +0200},
+ Howpublished = {\url{http://www.w3.org/}},
+ Title = {{W3C Consortium}},
+ Year = {2008}}
+
+ at inproceedings{Luckow:2008xy,
+ Address = {Edinburgh, UK},
+ Author = {A. Luckow and S. Jha and A. Merzky and B. Schnor and J. Kim},
+ Booktitle = {Proceedings of UK e-Science 2008 All Hands Meeting },
+ Date-Added = {2008-07-07 21:10:42 +0200},
+ Date-Modified = {2008-07-08 20:42:22 +0200},
+ Title = {{Reliable Replica Exchange Molecular Dynamics Simulation in the Grid using SAGA CPR and Migol}},
+ Year = {2008}}
+
+ at misc{wincluster08,
+ Date-Added = {2008-06-30 23:31:37 +0200},
+ Date-Modified = {2008-07-20 13:35:25 +0200},
+ Howpublished = {\url{http://www.microsoft.com/windowsserver2003/enterprise/clustering.mspx}},
+ Title = {{Windows 2003 Server Cluster}},
+ Year = {2008}}
+
+ at misc{ohac,
+ Date-Added = {2008-06-30 23:29:06 +0200},
+ Date-Modified = {2008-07-20 13:34:57 +0200},
+ Howpublished = {\url{http://www.opensolaris.org/os/community/ha-clusters/ohac/}},
+ Title = {{Open High Availability Cluster}},
+ Year = {2008}}
+
+ at article{10.1109/CLUSTR.2001.959986,
+ Address = {Los Alamitos, CA, USA},
+ Author = {P. Papadopoulos and M. Katz and G. Bruno},
+ Date-Added = {2008-06-30 22:31:58 +0200},
+ Date-Modified = {2008-06-30 22:32:16 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/CLUSTR.2001.959986},
+ Isbn = {0-7695-1116-3},
+ Journal = {cluster},
+ Pages = {258},
+ Publisher = {IEEE Computer Society},
+ Title = {{NPACI Rocks: Tools and Techniques for Easily Deploying Manageable Linux Clusters}},
+ Volume = {00},
+ Year = {2001},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/CLUSTR.2001.959986}}
+
+ at inproceedings{1268399,
+ Address = {Berkeley, CA, USA},
+ Author = {A. Robertson},
+ Booktitle = {ALS'00: Proceedings of the 4th conference on 4th Annual Linux Showcase \& Conference, Atlanta},
+ Date-Added = {2008-06-30 21:46:20 +0200},
+ Date-Modified = {2008-06-30 21:46:40 +0200},
+ Location = {Atlanta, Georgia},
+ Pages = {20--20},
+ Publisher = {USENIX Association},
+ Title = {{Linux-HA Heartbeat System Design}},
+ Year = {2000}}
+
+ at article{10.1109/ICWS.2007.57,
+ Address = {Los Alamitos, CA, USA},
+ Author = {J. Osrael and L. Froihofer and M. Weghofer and K. Goeschka},
+ Date-Added = {2008-06-30 19:25:53 +0200},
+ Date-Modified = {2008-06-30 19:26:21 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/ICWS.2007.57},
+ Isbn = {0-7695-2924-0},
+ Journal = {icws},
+ Pages = {591-598},
+ Publisher = {IEEE Computer Society},
+ Title = {{Axis2-Based Replication Middleware for Web Services}},
+ Volume = {0},
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/ICWS.2007.57}}
+
+ at article{10.1109/ICDSC.2001.919005,
+ Address = {Los Alamitos, CA, USA},
+ Author = {H. Miranda and A. Pinto and L. Rodrigues},
+ Date-Added = {2008-06-30 17:49:02 +0200},
+ Date-Modified = {2008-06-30 17:49:37 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/ICDSC.2001.919005},
+ Isbn = {0-7695-1077-9},
+ Journal = {21st IEEE International Conference on Distributed Computing Systems (ICDCS'01)},
+ Pages = {0707},
+ Publisher = {IEEE Computer Society},
+ Title = {{Appia: A Flexible Protocol Kernel Supporting Multiple Coordinated Channels}},
+ Volume = {00},
+ Year = {2001},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/ICDSC.2001.919005}}
+
+ at techreport{867060,
+ Address = {Ithaca, NY, USA},
+ Author = {K. Birman and R. Constable and M. Hayden and J. Hickey and C. Kreitz and R. Van Renesse and O. Rodeh and W. Vogels},
+ Date-Added = {2008-06-30 13:41:55 +0200},
+ Date-Modified = {2008-06-30 13:42:33 +0200},
+ Publisher = {Cornell University},
+ Source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Acornellcs%3ACORNELLCS%3ATR99-1774},
+ Title = {{The Horus and Ensemble Projects: Accomplishments and Limitations}},
+ Year = {1999}}
+
+ at phdthesis{Hayden:1998tw,
+ Author = {M. Hayden},
+ Date-Added = {2008-06-30 13:37:59 +0200},
+ Date-Modified = {2008-06-30 13:39:01 +0200},
+ Howpublished = {\url{http://www.cs.cornell.edu/Info/Projects/NuPrl/documents/hayden/ensemblesystem.pdf}},
+ School = {Department of Computer Science, Cornell University},
+ Title = {{The Ensemble System}},
+ Year = {1998}}
+
+ at inproceedings{737773,
+ Address = {Washington, DC, USA},
+ Author = {Y. Amir and C. Danilov and J. Stanton},
+ Booktitle = {DSN '00: Proceedings of the 2000 International Conference on Dependable Systems and Networks (formerly FTCS-30 and DCCA-8)},
+ Date-Added = {2008-06-30 13:20:53 +0200},
+ Date-Modified = {2008-06-30 13:21:11 +0200},
+ Isbn = {0-7695-0707-7},
+ Pages = {327--336},
+ Publisher = {IEEE Computer Society},
+ Title = {{A Low Latency, Loss Tolerant Architecture and Protocol for Wide Area Group Communication}},
+ Year = {2000}}
+
+ at article{10.1109/TC.2004.1275293,
+ Address = {Los Alamitos, CA, USA},
+ Author = {P. Felber and P. Narasimhan},
+ Date-Added = {2008-06-29 23:54:31 +0200},
+ Date-Modified = {2008-06-29 23:54:44 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/TC.2004.1275293},
+ Issn = {0018-9340},
+ Journal = {IEEE Transactions on Computers},
+ Number = {5},
+ Pages = {497-511},
+ Publisher = {IEEE Computer Society},
+ Title = {{Experiences, Strategies, and Challenges in Building Fault-Tolerant CORBA Systems}},
+ Volume = {53},
+ Year = {2004},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/TC.2004.1275293}}
+
+ at phdthesis{932439,
+ Author = {P. Narasimhan},
+ Date-Added = {2008-06-29 23:43:37 +0200},
+ Date-Modified = {2008-06-29 23:43:55 +0200},
+ Isbn = {0-599-94630-X},
+ Note = {Chair-Louise E. Moser},
+ Order_No = {AAI9987908},
+ Publisher = {University of California, Santa Barbara},
+ Title = {{Transparent Fault Tolerance for CORBA}},
+ Year = {1999}}
+
+ at inproceedings{1268110,
+ Address = {Berkeley, CA, USA},
+ Author = {S. Maffeis},
+ Booktitle = {COOTS'95: Proceedings of the USENIX Conference on Object-Oriented Technologies on USENIX Conference on Object-Oriented Technologies (COOTS)},
+ Date-Added = {2008-06-29 22:46:32 +0200},
+ Date-Modified = {2008-06-29 22:46:55 +0200},
+ Location = {Monterey, California},
+ Pages = {10--10},
+ Publisher = {USENIX Association},
+ Title = {{Adding Group Communication and Fault-Tolerance to CORBA}},
+ Year = {1995}}
+
+ at misc{OMG:2008zh,
+ Author = {OMG},
+ Date-Added = {2008-06-29 21:28:06 +0200},
+ Date-Modified = {2008-06-29 21:29:36 +0200},
+ Howpublished = {\url{http://www.omg.org/spec/CORBA/3.1/}},
+ Title = {{Common Object Request Broker Architecture (CORBA) Specification, Version 3.1}},
+ Year = {2008}}
+
+ at misc{XSSTS06,
+ Abstract = {The Endpoint Handlespace Redundancy Protocol (ENRP) is
+designed to work in conjunction with the Aggregate Server Access
+Protocol (ASAP) to accomplish the functionality of the Reliable
+Server Pooling (RSerPool) requirements and architecture. Within the
+operational scope of RSerPool, ENRP defines the procedures and
+message formats of a distributed, fault-tolerant registry service for
+storing, bookkeeping, retrieving, and distributing pool operation and
+membership information.},
+ Author = {Q. Xie and R. Stewart and M. Stillman and M. T\"uxen and A. Silverton},
+ Date-Added = {2008-06-29 17:24:48 +0200},
+ Date-Modified = {2008-07-20 14:21:07 +0200},
+ Howpublished = {{Technical Report, Internet-Draft Version 17, IETF, RSerPool Working Group}},
+ Html = {http://www.ietf.org/internet-drafts/draft-ietf-rserpool-enrp-17.txt},
+ Key = {SCTP},
+ Month = {September},
+ Title = {{Endpoint Handlespace Redundancy Protocol (ENRP)}},
+ Year = {2007}}
+
+ at inproceedings{dreibholz05implementing,
+ Abstract = {The Reliable Server Pooling~(RSerPool) protocol
+suite currently under standardization by the IETF is designed to build
+systems providing highly available services by mechanisms and protocols
+for establishing, configuring, accessing and monitoring pools of server
+resources. But RSerPool is not only able to manage pools of redundant
+servers and facilitate service failover between servers: it also
+includes sophisticated mechanisms for server selections within the
+pools. These mechanisms make RSerPool useful for the application in
+load balancing and distributed computing scenarios. As part of our
+RSerPool research and to verify results of our simulation model in
+real-life scenarios, we have created a complete implementation
+prototype of the RSerPool framework. In this paper, we will give a
+detailed description of the concepts, ideas and realizations of our
+prototype. Furthermore, we will show performance issues raised by the
+management of large servers pools, as it is necessary for load
+balancing or distributed computing scenarios. We will explain the
+algorithms and data structures we designed to solve these challenges
+and finally present a rough performance evaluation that verifies our
+concept.},
+ Address = {Zagreb/Croatia},
+ Author = {T.~Dreibholz and E.~P.~Rathgeb},
+ Booktitle = {Proceedings of the 8th IEEE International Conference on Telecommunications},
+ Date-Added = {2008-06-29 17:24:12 +0200},
+ Date-Modified = {2008-06-29 17:24:12 +0200},
+ Isbn = {953-184-081-4},
+ Month = jun,
+ Pages = {21-28},
+ Title = {{Implementing the Reliable Server Pooling Framework}},
+ Url = {\url{http://tdrwww.iem.uni-due.de/dreibholz/rserpool/rserpool-publications/Contel2005.pdf}},
+ Volume = {1},
+ Year = {2005},
+ Bdsk-Url-1 = {http://tdrwww.iem.uni-due.de/dreibholz/rserpool/rserpool-publications/Contel2005.pdf}}
+
+ at inproceedings{dreibholz-efficient,
+ Abstract = {Many Internet services require high availability. Server
+pooling provides a high availability solution using redundant servers.
+If one server fails, the service is continued by another one. A
+challenge for server pooling is efficient state sharing: The new server
+requires the old one's state to continue service. This paper proposes a
+simple, efficient and scalable solution, usable for a large subset of
+applications.},
+ Address = {Tampa, Florida/U.S.A.},
+ Author = {T. Dreibholz},
+ Booktitle = {Proceedings of the 27th Local Computer Networks Conference},
+ Date-Added = {2008-06-29 17:22:00 +0200},
+ Date-Modified = {2008-06-29 17:22:29 +0200},
+ Month = {Oct},
+ Title = {{An Efficient Approach for State Sharing in Server Pools}},
+ Url = {citeseer.ist.psu.edu/article/dreibholz02efficient.html},
+ Year = {2002},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/article/dreibholz02efficient.html}}
+
+ at techreport{SXST06,
+ Abstract = {ggregate Server Access Protocol (ASAP) in conjunction with the
+ Endpoint Handlespace Redundancy Protocol (ENRP) [9] provides a high
+ availability data transfer mechanism over IP networks. ASAP uses a
+ handle-based addressing model which isolates a logical communication
+ endpoint from its IP address(es), thus effectively eliminating the
+ binding between the communication endpoint and its physical IP
+ address(es) which normally constitutes a single point of failure.
+
+ In addition, ASAP defines each logical communication destination as a
+ pool, providing full transparent support for server-pooling and load
+ sharing. It also allows dynamic system scalability - members of a
+ server pool can be added or removed at any time without interrupting
+ the service.
+
+ ASAP is designed to take full advantage of the network level
+ redundancy provided by the Stream Transmission Control Protocol
+ (SCTP) RFC2960 [3]. Each transport protocol, other than SCTP, MUST
+ have an accompanying transport mapping document. It should be noted
+ that ASAP messages passed between PE's and ENRP servers MUST use the
+ SCTP transport protocol.
+
+ The high availability server pooling is gained by combining two
+ protocols, namely ASAP and ENRP, in which ASAP provides the user
+ interface for pool handle to address translation, load sharing
+ management, and fault management while ENRP defines the high
+ availability pool handle translation service.},
+ Author = {{R. Stewart and Q. Xie and M. Stillman and M. T\"uxen}},
+ Date-Added = {2008-06-29 16:18:54 +0200},
+ Date-Modified = {2008-06-29 16:18:59 +0200},
+ Howpublished = {Technical Report, Internet-Draft Version 17, IETF, RSerPool Working Group},
+ Html = {http://www.ietf.org/internet-drafts/draft-ietf-rserpool-asap-17.txt},
+ Key = {SCTP},
+ Month = {September},
+ Title = {Aggregate Server Access Protocol (ASAP)},
+ Year = {2007}}
+
+ at techreport{J05,
+ Abstract = {The new Stream Control Transmission Protocol (SCTP) was originally standardised
+for the transport of signaling messages  originating from the Common Channel
+Signaling System No.7 (SS7) Â over IP based networks. Nonetheless, SCTP
+is a general purpose IP-based reliable transport protocol which is connection oriented
+and offers message based data transfer. It supports multiple independent
+message streams and flexible data delivery mechanisms. In contrast to TCP,
+SCTP protocol endpoints support multiple addresses (multihoming), and therefore
+an endpoint may be reached via different and possibly redundant network paths.
+Several SCTP enhancements, e.g. for partially reliable message transfer [82], are
+currently being proposed, and it is conceivable that SCTP is a suitable transport
+protocol for many established and future applications [5, 20].
+In this thesis, the behaviour and performance of SCTP was investigated within
+different scenarios. A suite of tools were developed to this end: in a testbed
+environment, a Unix-based protocol implementation named sctplib was created
+for demonstrating SCTP fairness towards TCP, and the applicability of SCTP for
+signaling transport was investigated.
+Discrete event-based simulation models of the SCTP data path and some control
+path elements were created and validated against the results from the testbed
+experiments. These models were later enhanced to investigate the simultaneous
+use of several available paths (also named load sharing). From an operator perspective,
+load sharing is desirable for signaling transport at high network loads,
+even though it requires substantial protocol adaptations. Load sharing mechanisms
+from the literature were evaluated, and a number of modifications of these
+algorithms were suggested and evaluated, as well.
+It could be shown that the modifications of the existing load sharing algorithms
+that were developed within the scope of this dissertation indeed lead to an
+optimisation of the load sharing algorithms so far proposed, both in terms of
+overall throughput and maximum message delay that can be expected.},
+ Author = {A. Jungmaier},
+ Date-Added = {2008-06-29 16:18:30 +0200},
+ Date-Modified = {2008-06-29 16:18:37 +0200},
+ Howpublished = {PhD thesis, Universit\"at Duisburg-Essen, Institut f\"ur Experimentelle Mathematik},
+ Key = {SCTP},
+ Month = {August},
+ Title = {{Das Transportprotokoll SCTP}},
+ Year = {2005}}
+
+ at techreport{SXMSSTRKZP00,
+ Abstract = {Stream Control Transmission Protocol [RFC2960] (SCTP) currently uses an
+ Adler-32 checksum. For small packets Adler-32 provides weak detection
+ of errors. This document changes that checksum and updates SCTP to
+ use a 32 bit CRC checksum.},
+ Author = {R. Stewart and Q. Xie and K. Morneault and C. Sharp and H. Schwarzbauer and T. Taylor and I. Rytina and M. Kalla and L. Zhang and V. Paxson},
+ Date-Added = {2008-06-29 16:17:48 +0200},
+ Date-Modified = {2008-06-29 17:47:39 +0200},
+ Howpublished = {Technical Report, Standards Track RFC 2960, Internet-Draft Version 02, IETF, RSerPool Working Group},
+ Html = {http://tools.ietf.org/html/draft-ietf-tsvwg-sctpcsum-07},
+ Key = {SCTP},
+ Month = {May},
+ Title = {{Stream Control Transmission Protocol}},
+ Year = {2002}}
+
+ at techreport{TXSSLS06,
+ Abstract = {This document describes an architecture and protocols for the
+management and operation of server pools supporting highly reliable
+applications, and for client access mechanisms to a server pool.},
+ Author = {M. Tuexen and Q. Xie and R. Stewart and M. Shore and J. Loughney and A. Silverton},
+ Date-Added = {2008-06-29 16:15:59 +0200},
+ Date-Modified = {2008-06-29 16:17:10 +0200},
+ Howpublished = {Technical Report Version 11, IETF, RSer Pool Working Group},
+ Html = {http://www3.tools.ietf.org/html/draft-ietf-rserpool-arch-12},
+ Key = {SLB},
+ Month = {March},
+ Title = {{Architecture for Reliable Server Pooling}},
+ Year = {2006}}
+
+ at conference{Cristian88,
+ Abstract = {The author describes his system model and failure assumptions by precisely specifying the processor group membership problem. He then gives two protocols for solving this problem. The protocols provide all correct processors with constituent views of the processor group membership. They also guarantee bounded processor failure detection and join processing delays despite any number of performance failures that do not cause network partitioning. The first protocol provides very fast processor failure detection but can require a significant message traffic overhead, even when no failures occur. To reduce this overhead, the author derives the second protocol, which has a (provable) minimal message overhead in the absence of failures but provides a longer failure detection delay and is more complex. He concludes by comparing his approach with other known approaches},
+ Author = {Cristian, F. },
+ Date-Added = {2008-06-28 18:33:09 +0200},
+ Date-Modified = {2008-06-28 18:33:34 +0200},
+ Inbook = {Eighteenth International Symposium on Fault-Tolerant Computing. Digest of Papers. FTCS-18 },
+ Interesting = {high/med/low},
+ Pages = {206-11},
+ Title = {{Agreeing on Who is Present and Who is Absent in a Synchronous Distributed System}},
+ Url = {http://ieeexplore.ieee.org/iel2/210/275/00005321.pdf},
+ Year = {1988},
+ Bdsk-Url-1 = {http://ieeexplore.ieee.org/iel2/210/275/00005321.pdf}}
+
+ at article{70732,
+ Address = {New York, NY, USA},
+ Author = {M. Kaashoek and A. Tanenbaum and S. Hummel},
+ Date-Added = {2008-06-28 13:59:55 +0200},
+ Date-Modified = {2008-06-28 14:00:44 +0200},
+ Doi = {http://doi.acm.org/10.1145/70730.70732},
+ Issn = {0163-5980},
+ Journal = {SIGOPS Operating Systems Review },
+ Number = {4},
+ Pages = {5--19},
+ Publisher = {ACM},
+ Title = {{An Efficient Reliable Broadcast Protocol}},
+ Volume = {23},
+ Year = {1989},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/70730.70732}}
+
+ at article{503113,
+ Address = {New York, NY, USA},
+ Author = {G. Chockler and I. Keidar and R. Vitenberg},
+ Date-Added = {2008-06-28 12:40:29 +0200},
+ Date-Modified = {2008-06-28 12:41:03 +0200},
+ Doi = {http://doi.acm.org/10.1145/503112.503113},
+ Issn = {0360-0300},
+ Journal = {ACM Computing Surveys},
+ Number = {4},
+ Pages = {427--469},
+ Publisher = {ACM},
+ Title = {{Group Communication Specifications: A Comprehensive Study}},
+ Volume = {33},
+ Year = {2001},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/503112.503113}}
+
+ at inproceedings{moser94extended,
+ Author = {L. Moser and Y. Amir and P. Melliar-Smith and D. Agarwal},
+ Booktitle = {The 14th {IEEE} International Conference on Distributed Computing Systems ({ICDCS})},
+ Date-Added = {2008-06-27 23:39:55 +0200},
+ Date-Modified = {2008-06-27 23:40:19 +0200},
+ Pages = {56--65},
+ Title = {{Extended Virtual Synchrony}},
+ Url = {citeseer.ist.psu.edu/moser94extended.html},
+ Year = {1994},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/moser94extended.html}}
+
+ at article{361623,
+ Address = {New York, NY, USA},
+ Author = {D. L. Parnas},
+ Date-Added = {2008-06-27 18:15:42 +0200},
+ Date-Modified = {2008-06-27 18:16:09 +0200},
+ Doi = {http://doi.acm.org/10.1145/361598.361623},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {12},
+ Pages = {1053--1058},
+ Publisher = {ACM},
+ Title = {{On the Criteria to be Used in Decomposing Systems into Modules}},
+ Volume = {15},
+ Year = {1972},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/361598.361623}}
+
+ at misc{Jindal:2001dp,
+ Author = {Jindal, A. and Lim, S. and Radia, S. and Chang, W.-L. },
+ Date-Added = {2008-06-20 13:20:18 +0200},
+ Date-Modified = {2008-06-20 13:32:49 +0200},
+ Howpublished = {\url{http://www.freepatentsonline.com/6324580.html}},
+ Title = {{Load Balancing for Replicated Services}},
+ Year = {2001}}
+
+ at inproceedings{689520,
+ Address = {London, UK},
+ Author = {A. Downey},
+ Booktitle = {IPPS '97: Proceedings of the Job Scheduling Strategies for Parallel Processing},
+ Date-Added = {2008-06-18 20:10:17 +0200},
+ Date-Modified = {2008-06-18 20:10:28 +0200},
+ Isbn = {3-540-63574-2},
+ Pages = {35--57},
+ Publisher = {Springer-Verlag},
+ Title = {{Using Queue Time Predictions for Processor Allocation}},
+ Year = {1997}}
+
+ at inproceedings{689526,
+ Address = {London, UK},
+ Author = {W. Smith and I. Foster and V. Taylor},
+ Booktitle = {IPPS/SPDP '98: Proceedings of the Workshop on Job Scheduling Strategies for Parallel Processing},
+ Date-Added = {2008-06-18 19:47:05 +0200},
+ Date-Modified = {2008-06-18 19:48:27 +0200},
+ Isbn = {3-540-64825-9},
+ Pages = {122--142},
+ Publisher = {Springer-Verlag},
+ Title = {{Predicting Application Run Times Using Historical Information}},
+ Year = {1998}}
+
+ at inproceedings{Sadjadi:2008ad,
+ Address = {Miami, USA},
+ Author = {S. M. Sadjadi and S. Shimizu and J. Figueroa and R. Rangaswami and J. Delgado and H. Duran and X. J. Collazo-Mojica},
+ Booktitle = {Proceedings of Fifth High-Performance Grid Computing Workshop in conjunction with IEEE International Parallel \& Distributed Processing Symposium},
+ Date-Added = {2008-06-18 19:38:27 +0200},
+ Date-Modified = {2008-06-23 10:28:40 +0200},
+ Read = {Yes},
+ Title = {{A Modeling Approach for Estimating Execution Time of Long-Running Scientific Applications}},
+ Year = {2008}}
+
+ at article{59324,
+ Address = {Norwood, MA, USA},
+ Author = {J. H. Saltzer and D. P. Reed and D. D. Clark},
+ Book = {Innovations in Internetworking},
+ Date-Added = {2008-06-17 17:32:00 +0200},
+ Date-Modified = {2008-06-17 17:32:15 +0200},
+ Isbn = {0-89006-337-0},
+ Pages = {195--206},
+ Publisher = {Artech House, Inc.},
+ Title = {{End-to-End Arguments in System Design}},
+ Year = {1988}}
+
+ at article{Arnold:2001:CCD,
+ Acknowledgement = ack-nhfb,
+ Author = {D. C. Arnold and S. S. Vahdiyar and J. J. Dongarra},
+ Bibsource = {http://ejournals.wspc.com.sg/ppl/ppl.shtml},
+ Coden = {PPLTEE},
+ Date-Added = {2008-06-17 16:54:56 +0200},
+ Date-Modified = {2008-06-17 16:56:26 +0200},
+ Issn = {0129-6264},
+ Journal = {Parallel Processing Letters},
+ Number = {2--3},
+ Pages = {187--202},
+ Title = {{On the Convergence of Computational and Data Grids}},
+ Url = {http://ejournals.wspc.com.sg/ppl/11/sample/S012962640100052X.html; http://www.netlib.org/utk/people/JackDongarra/PAPERS/convergence-data-grids.pdf},
+ Volume = {11},
+ Year = {2001},
+ Bdsk-Url-1 = {http://ejournals.wspc.com.sg/ppl/11/sample/S012962640100052X.html;%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20http://www.netlib.org/utk/people/JackDongarra/PAPERS/convergence-data-grids.pdf}}
+
+ at inproceedings{873244,
+ Address = {Washington, DC, USA},
+ Author = {G. Lanfermann and G. Allen and T. Radke and E. Seidel},
+ Booktitle = {CCGRID '02: Proceedings of the 2nd IEEE/ACM International Symposium on Cluster Computing and the Grid},
+ Date-Added = {2008-06-16 23:11:59 +0200},
+ Date-Modified = {2008-06-16 23:12:23 +0200},
+ Isbn = {0-7695-1582-7},
+ Pages = {280},
+ Publisher = {IEEE Computer Society},
+ Title = {{Nomadic Migration: Fault Tolerance in a Disruptive Grid Environment}},
+ Year = {2002}}
+
+ at inproceedings{689541,
+ OPTAddress = {London, UK},
+ Author = {S. J. Chapin and D. Katramatos and J. F. Karpovich and A. S. Grimshaw},
+ Booktitle = {IPPS/SPDP '99/JSSPP '99: Proceedings of the Job Scheduling Strategies for Parallel Processing},
+ Date-Added = {2008-06-16 22:16:58 +0200},
+ Date-Modified = {2008-06-16 22:17:25 +0200},
+ Isbn = {3-540-66676-1},
+ Pages = {162--178},
+ Publisher = {Springer-Verlag},
+ Title = {{The Legion Resource Management System}},
+ Year = {1999}}
+
+ at misc{nagios,
+ Date-Added = {2008-06-16 17:38:29 +0200},
+ Date-Modified = {2008-06-16 17:38:56 +0200},
+ Howpublished = {\url{http://www.nagios.org/}},
+ Key = {nagios},
+ Title = {Nagios Homepage},
+ Year = {2008}}
+
+ at techreport{894137,
+ Address = {Berkeley, CA, USA},
+ Author = {D. Patterson and A. Brown and P. Broadwell and G. Candea and M. Chen and J. Cutler and P. Enriquez and A. Fox and E. Kiciman and M. Merzbacher and D. Oppenheimer and N. Sastry and W. Tetzlaff and J. Traupman and N. Treuhaft},
+ Date-Added = {2008-06-16 17:28:20 +0200},
+ Date-Modified = {2008-06-16 17:29:57 +0200},
+ Publisher = {University of California at Berkeley},
+ Source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Aucb%3AUCB%2F%2FCSD-02-1175},
+ Title = {{Recovery Oriented Computing (ROC): Motivation, Definition, Techniques, and Case Studies}},
+ Year = {2002}}
+
+ at misc{Begin:2008rq,
+ Author = {Marc-Ellian B\'egin},
+ Date-Added = {2008-06-16 09:47:54 +0200},
+ Date-Modified = {2008-06-16 09:49:17 +0200},
+ Howpublished = {\url{https://edms.cern.ch/file/925013/3/EGEE-Grid-Cloud.pdf}},
+ Title = {Grids and Clouds -- Evolution or Revolution},
+ Year = {2008}}
+
+ at misc{McCarthy:1961th,
+ Author = {J. McCarthy},
+ Date-Added = {2008-06-16 09:23:20 +0200},
+ Date-Modified = {2008-06-16 09:38:51 +0200},
+ Howpublished = {{\em Cited in} Architects of the Information Society: Thirty-five Years of the Laboratory for Computer Science at MIT. S. L. Garfinkel (Ed), MIT Press, Cambridge MA},
+ Title = {{MIT Centennial Speech. 1961}},
+ Year = {1999}}
+
+ at misc{Vogels:2008nx,
+ Author = {W. Vogels},
+ Date-Added = {2008-06-16 08:15:19 +0200},
+ Date-Modified = {2008-06-16 10:29:36 +0200},
+ Howpublished = {Talk given at the Open Grid Forum 23},
+ Title = {{A Head in the Cloud - The Power of Infrastructure as a Service}},
+ Year = {2008}}
+
+ at misc{reservoir,
+ Date-Added = {2008-06-16 08:04:27 +0200},
+ Date-Modified = {2008-06-16 08:05:40 +0200},
+ Howpublished = {\url{http://www.reservoir-fp7.eu/}},
+ Key = {Reservoir},
+ Title = {{RESERVOIR - Resources and Services Virtualization Without Barriers}},
+ Year = {2008}}
+
+ at misc{Battre:2008wc,
+ Author = {D. Battr\'e and O. Kao and K. Voss},
+ Date-Added = {2008-06-14 20:19:11 +0200},
+ Date-Modified = {2008-06-14 20:20:02 +0200},
+ Howpublished = {\url{http://www.assessgrid.eu/fileadmin/AssessGrid/usermounts/publications/papers/Implementing-WS-Agreement-Abstract.pdf}},
+ Title = {{Implementing WS-Agreement in a Globus Toolkit 4.0 Environment}},
+ Year = {2008}}
+
+ at misc{HPCGI,
+ Date-Added = {2008-06-14 19:59:02 +0200},
+ Date-Modified = {2008-06-14 20:00:57 +0200},
+ Howpublished = {\url{http://www.ogf.org/rotate_headers/documents/HPCBP_Data_Sheet_final1.pdf}},
+ Title = {{HPC Grid Interoperability Demonstration at SC07}},
+ Year = {2007}}
+
+ at misc{knowarc,
+ Date-Added = {2008-06-14 19:21:59 +0200},
+ Date-Modified = {2008-06-14 19:24:07 +0200},
+ Howpublished = {\url{http://www.knowarc.eu/}},
+ Key = {knowarc},
+ Month = {2008},
+ Title = {{KnowARC -- Grid-enabled Know-how Sharing Technology Based on ARC Services and Open Standards}}}
+
+ at article{Staten:2008zm,
+ Author = {J. Staten and S. Yates and F. Gillett and W. Saleh and R. Dines},
+ Date-Added = {2008-06-14 18:16:02 +0200},
+ Date-Modified = {2008-06-14 18:17:38 +0200},
+ Journal = {Forrester Research},
+ Title = {{Is Cloud Computing Ready for the Enterprise}},
+ Year = {2008}}
+
+ at article{Allen:2008la,
+ Author = {Allen, G. and Bogden, P. and Kosar, T. and Kulshrestha, A. and Namala, G. and Tummala, S. and Seidel, E.},
+ Date-Added = {2008-06-14 17:30:08 +0200},
+ Date-Modified = {2008-06-14 17:32:44 +0200},
+ Howpublished = {\url{http://www.ctwatch.org/quarterly/articles/2008/03/cyberinfrastructure-for-coastal-hazard-prediction/}},
+ Journal = {CTWatch Quarterly},
+ Number = {1},
+ Title = {{Cyberinfrastructure for Coastal Hazard Prediction}},
+ Volume = {4},
+ Year = {2008}}
+
+ at misc{chervenak99data,
+ Author = {A. Chervenak and I. Foster and C. Kesselman and C. Salisbury and S. Tuecke},
+ Date-Added = {2008-06-14 17:03:51 +0200},
+ Date-Modified = {2008-06-14 17:04:25 +0200},
+ Text = {A. Chervenak, I. Foster, C. Kesselman, C. Salisbury, and S. Tuecke. The Data Grid: Towards an architecture for the distributed management and analysis of large scientific datasets. http://www.globus.org/, 1999. 132},
+ Title = {{The Data Grid: Towards an Architecture for the Distributed Management and Analysis of Large Scientific Datasets}},
+ Url = {citeseer.ist.psu.edu/chervenak99data.html},
+ Year = {1999},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/chervenak99data.html}}
+
+ at article{10.1109/ICPADS.2005.155,
+ Address = {Los Alamitos, CA, USA},
+ Author = {M. Hovestadt},
+ Date-Added = {2008-06-10 19:20:11 +0200},
+ Date-Modified = {2008-06-10 19:20:50 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/ICPADS.2005.155},
+ Issn = {1521-9097},
+ Journal = {11th International Conference on Parallel and Distributed Systems},
+ Pages = {458-462},
+ Publisher = {IEEE Computer Society},
+ Title = {{Fault Tolerance Mechanisms for SLA-aware Resource Management}},
+ Volume = {02},
+ Year = {2005},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/ICPADS.2005.155}}
+
+ at inproceedings{1251461,
+ Address = {Berkeley, CA, USA},
+ Author = {D. Oppenheimer and A. Ganapathi and D. Patterson},
+ Booktitle = {USITS'03: Proceedings of the 4th conference on USENIX Symposium on Internet Technologies and Systems},
+ Date-Added = {2008-06-10 16:57:06 +0200},
+ Date-Modified = {2008-06-10 16:57:51 +0200},
+ Location = {Seattle, WA},
+ Pages = {1--1},
+ Publisher = {USENIX Association},
+ Title = {{Why do Internet Services Fail, and What can be Done About it?}},
+ Year = {2003}}
+
+ at inproceedings{DBLP:conf/grid/KhaliliHOSC06,
+ Author = {Omid Khalili and Jiahua He and Catherine Olschanowsky and Allan Snavely and Henri Casanova},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {GRID},
+ Crossref = {DBLP:conf/grid/2006},
+ Date-Added = {2008-06-08 13:14:10 +0200},
+ Date-Modified = {2008-06-08 13:14:19 +0200},
+ Ee = {http://dx.doi.org/10.1109/ICGRID.2006.311028},
+ Pages = {293-300},
+ Title = {{Measuring the Performance and Reliability of Production Computational Grids}},
+ Year = {2006}}
+
+ at proceedings{DBLP:conf/grid/2006,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {GRID},
+ Date-Added = {2008-06-08 13:14:03 +0200},
+ Date-Modified = {2008-06-08 13:14:03 +0200},
+ Isbn = {1-4244-0344-8},
+ Publisher = {IEEE},
+ Title = {7th IEEE/ACM International Conference on Grid Computing (GRID 2006), September 28-29, 2006, Barcelona, Spain, Proceedings},
+ Year = {2006}}
+
+ at article{10.1109/E-SCIENCE.2006.93,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Hui Li and David Groep and Lex Wolters and Jeff Templon},
+ Date-Added = {2008-06-08 13:03:16 +0200},
+ Date-Modified = {2008-06-08 13:03:25 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/E-SCIENCE.2006.93},
+ Isbn = {0-7695-2734-5},
+ Journal = {e-science},
+ Pages = {27},
+ Publisher = {IEEE Computer Society},
+ Title = {{Job Failure Analysis and Its Implications in a Large-Scale Production Grid}},
+ Volume = {0},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/E-SCIENCE.2006.93}}
+
+ at inproceedings{Keller-Reinefeld-1998,
+ Address = {Orlando, Florida},
+ Author = {Axel Keller and Alexander Reinefeld},
+ Booktitle = {Proc. Heterogenous Computing Workshop HCW\#98 at IPPS},
+ Date-Added = {2008-06-02 15:23:38 +0200},
+ Date-Modified = {2008-06-02 15:23:38 +0200},
+ Pages = {44--56},
+ Publisher = {IEEE Comp. Society Press},
+ Title = {CCS Resource Management in Networked HPC Systems },
+ Year = 1998}
+
+ at misc{crown,
+ Date-Added = {2008-05-30 11:54:39 +0200},
+ Date-Modified = {2008-05-30 12:16:58 +0200},
+ Howpublished = {\url{http://www.crown.org.cn/en/}},
+ Key = {crown},
+ Title = {{China Research and Development Environment over Wide-Aread Network (CROWN)}},
+ Year = {2004}}
+
+ at misc{Miura:2006fp,
+ Author = {Kenichi Miura},
+ Date-Added = {2008-05-30 11:52:01 +0200},
+ Date-Modified = {2008-05-30 11:52:54 +0200},
+ Howpublished = {\url{http://www.nii.ac.jp/pi/n3/3_67.pdf}},
+ Title = {{Overview of Japanese Science Grid Project NAREGI}},
+ Year = {2006}}
+
+ at misc{creambes,
+ Date-Added = {2008-05-30 10:32:33 +0200},
+ Date-Modified = {2008-06-08 18:38:23 +0200},
+ Howpublished = {\url{http://grid.pd.infn.it/omii/cream-bes}},
+ Key = {CREAM-BES},
+ Title = {{CREAM-BES}},
+ Year = {2008}}
+
+ at misc{savva:2007kb,
+ Date-Added = {2008-05-29 22:16:01 +0200},
+ Date-Modified = {2008-05-29 22:16:01 +0200},
+ Editor = {A. Savva},
+ Howpublished = {Open Grid Forum Document GFD.115},
+ Note = {Open Grid Forum},
+ Title = {{JSDL SPMD Application Extension, Version 1.0}},
+ Year = 2007}
+
+ at misc{Humphrey:2007qf,
+ Author = {M. Humphrey and C. Smith and M. Theimer and G. Wasson},
+ Date-Added = {2008-05-29 22:14:50 +0200},
+ Date-Modified = {2008-05-29 22:16:58 +0200},
+ Howpublished = {Open Grid Forum Document GFD.115},
+ Note = {Open Grid Forum},
+ Title = {{JSDL HPC Profile Application Extension, Version 1.0}},
+ Year = 2007}
+
+ at misc{Dillaway:2007bv,
+ Author = {B. Dillaway and M. Humphrey and C. Smith and M. Theimer and G. Wasson},
+ Date-Added = {2008-05-29 22:13:05 +0200},
+ Date-Modified = {2008-05-29 22:13:53 +0200},
+ Howpublished = {Open Grid Forum Document GFD.114},
+ Note = {Open Grid Forum},
+ Title = {{HPC Basic Profile, Version 1.0}},
+ Year = 2007}
+
+ at misc{wsn,
+ Date-Added = {2008-05-29 20:14:38 +0200},
+ Date-Modified = {2008-06-19 16:10:48 +0200},
+ Howpublished = {\url{http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=wsn}},
+ Key = {Web Service Notification},
+ Title = {{Web Service Notification (WSN) TC}},
+ Year = {2006}}
+
+ at article{Riedel:2008sh,
+ Author = {{\noopsort{Riedel}{M. Riedel}, E. Laure, T. Soddemann et.\,al.}},
+ Date-Added = {2008-05-29 19:56:03 +0200},
+ Date-Modified = {2008-05-29 20:11:11 +0200},
+ Howpublished = {\url{http://www.ogf.org/OGF_Special_Issue/cpeginpaper_riedel.pdf}},
+ Journal = {Concurrency and Computation: Practice and Experience (OGF Special Issue)},
+ Key = {Riedel},
+ Title = {{Interoperation of World-Wide Production e-Science Infrastructures}},
+ Year = {2008}}
+
+ at techreport{Liu:07:WSD,
+ Author = {C. Liu and D. Booth},
+ Bibsource = {http://webcapita.com/w3c-bibliography/2007/REC-wsdl20-primer-20070626},
+ Date-Added = {2008-05-29 18:19:13 +0200},
+ Date-Modified = {2008-06-28 11:51:53 +0200},
+ Institution = {W3C},
+ Month = jun,
+ Note = {http://www.w3.org/TR/2007/REC-wsdl20-primer-20070626},
+ Title = {{Web Services Description Language ({WSDL}) Version 2.0 Part 0: Primer}},
+ Type = {{W3C} Recommendation},
+ Year = {2007}}
+
+ at techreport{Fallside:01:XSP,
+ Author = {David C. Fallside},
+ Bibsource = {http://webcapita.com/w3c-bibliography/2001/REC-xmlschema-0-20010502/},
+ Date-Added = {2008-05-29 18:14:05 +0200},
+ Date-Modified = {2008-05-29 18:14:05 +0200},
+ Institution = {W3C},
+ Month = may,
+ Note = {http://www.w3.org/TR/2001/REC-xmlschema-0-20010502/},
+ Title = {{XML} Schema Part 0: Primer},
+ Type = {first Edition of a Recommendation},
+ Year = {2001}}
+
+ at misc{astrogrid-uc,
+ Date-Added = {2008-05-29 17:22:31 +0200},
+ Date-Modified = {2008-05-29 17:37:52 +0200},
+ Howpublished = {\url{http://www.gac-grid.de/project-documents/UseCases.html}},
+ Key = {AstroGrid-D},
+ Keywords = {blcr},
+ Title = {{AstroGrid-D -- Use Cases}},
+ Year = {2008}}
+
+ at misc{Foster:2004gs,
+ Author = {I. Foster and D. Gannon and H. Kishimoto and J. Von Reich},
+ Date-Added = {2008-05-29 17:16:29 +0200},
+ Date-Modified = {2008-05-29 17:21:04 +0200},
+ Howpublished = {Open Grid Forum Document GFD.29},
+ Note = {Open Grid Forum},
+ Title = {{Open Grid Service Architecture Use Cases}},
+ Year = 2004}
+
+ at misc{Berry:2007vf,
+ Author = {D. Berry and A. Luniewski and M. Antonioletti},
+ Date-Added = {2008-05-29 17:16:27 +0200},
+ Date-Modified = {2008-05-29 17:16:27 +0200},
+ Howpublished = {Open Grid Forum Document GFD.121},
+ Note = {Open Grid Forum},
+ Title = {{OGSA Data Architecture}},
+ Year = 2007}
+
+ at misc{Sandholm:2003zh,
+ Author = {T. Sandholm and J. Gawor},
+ Date-Added = {2008-05-29 15:10:37 +0200},
+ Date-Modified = {2008-05-29 15:11:12 +0200},
+ Howpublished = {\url{http://www.globus.org/toolkit/3.0/ogsa/docs/gt3_core.pdf}},
+ Title = {{Globus Toolkit 3 Core -- A Grid Service Container}},
+ Year = {2003}}
+
+ at misc{gridsam,
+ Date-Added = {2008-05-29 13:55:00 +0200},
+ Date-Modified = {2008-05-29 15:13:51 +0200},
+ Howpublished = {\url{http://gridsam.sourceforge.net/2.0.1/index.html}},
+ Title = {{GridSAM -- Grid Job Submission and Monitoring Web Service}},
+ Year = {2007}}
+
+ at misc{Foster:2005la,
+ Author = {I. Foster},
+ Date-Added = {2008-05-29 12:27:17 +0200},
+ Date-Modified = {2008-05-29 12:27:48 +0200},
+ Howpublished = {\url{http://sorma.fzi.de/images/9/90/Fos05c.pdf}},
+ Title = {How Do I Model State? Let Me Count the Ways},
+ Year = {2005}}
+
+ at article{357392,
+ Address = {New York, NY, USA},
+ Author = {A. Birrell and B. Nelson},
+ Date-Added = {2008-05-29 11:12:00 +0200},
+ Date-Modified = {2008-07-20 12:06:59 +0200},
+ Doi = {http://doi.acm.org/10.1145/2080.357392},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems},
+ Number = {1},
+ Pages = {39--59},
+ Publisher = {ACM},
+ Title = {{Implementing Remote Procedure Calls}},
+ Volume = {2},
+ Year = {1984},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/2080.357392}}
+
+ at misc{Harbulot:2006rc,
+ Author = {Bruno Harbulot},
+ Date-Added = {2008-05-29 10:52:23 +0200},
+ Date-Modified = {2008-05-29 10:53:10 +0200},
+ Howpublished = {\url{http://blog.distributedmatter.net/post/2006/11/22/Experiences-with-WSRF}},
+ Title = {{Experiences with WSRF}},
+ Year = {2006}}
+
+ at misc{wsi,
+ Date-Added = {2008-05-29 10:33:01 +0200},
+ Date-Modified = {2008-06-16 09:39:36 +0200},
+ Howpublished = {\url{http://www.ws-i.org/Profiles/BasicProfile-1.1.html}},
+ Key = {WS-I},
+ Title = {{WS-I Basic Profile Version 1.1}},
+ Year = {2006}}
+
+ at misc{sandesha,
+ Date-Added = {2008-05-29 09:23:19 +0200},
+ Date-Modified = {2008-06-19 09:10:41 +0200},
+ Howpublished = {\url{http://ws.apache.org/sandesha/}},
+ Key = {Apache sandesha},
+ Title = {{Apache Sandesha}},
+ Year = {2008}}
+
+ at misc{Davis:2007kx,
+ Author = {D. Davis and A. Karmarkar and G. Pilz and S. Winkler and U. Yalcinalp},
+ Date-Added = {2008-05-29 09:02:24 +0200},
+ Date-Modified = {2008-05-29 09:04:07 +0200},
+ Editor = {Add data for field: Editor},
+ Howpublished = {\url{http://docs.oasis-open.org/ws-rx/wsrm/200702/wsrm-1.1-spec-os-01.pdf}},
+ Title = {{Web Services Reliable Messaging (WS-ReliableMessaging) Version 1.1}},
+ Year = {2007}}
+
+ at misc{oscar:2008rc,
+ Date-Added = {2008-05-28 23:08:10 +0200},
+ Date-Modified = {2008-06-27 23:42:38 +0200},
+ Howpublished = {\url{http://oscar.openclustergroup.org/}},
+ Key = {OSCAR},
+ Title = {{OSCAR -- Open Source Cluster Application Resource}},
+ Year = {2008}}
+
+ at inproceedings{Ananthakrishnan:2008lq,
+ Author = {R. Ananthakrishnan and M. D'Arcy and T. Howe},
+ Booktitle = {{Talk at Open Source Grid \& Cluster Conference}},
+ Date-Added = {2008-05-28 23:02:29 +0200},
+ Date-Modified = {2008-05-29 14:41:55 +0200},
+ Howpublished = {\url{http://www.opensourcegridcluster.org/documents/gw-javawscore-features.ppt}},
+ Title = {{Globus WS Core and Tools}},
+ Year = {2008}}
+
+ at article{10.1109/HPCS.2005.28,
+ Address = {Los Alamitos, CA, USA},
+ Author = {K. Limaye and B. Leangsuksun and V. Munganuru and Z. Greenwood and S. Scott and R. Libby and K. Chanchio},
+ Date-Added = {2008-05-28 22:54:38 +0200},
+ Date-Modified = {2008-06-27 23:45:58 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/HPCS.2005.28},
+ Issn = {1550-5243},
+ Journal = {19th International Symposium on 19th International Symposium on 19th International Symposium on 19th International Symposium on High Performance Computing Systems and Applications},
+ Pages = {333-339},
+ Publisher = {IEEE Computer Society},
+ Title = {{Grid-Aware HA-OSCAR}},
+ Volume = {00},
+ Year = {2005},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/HPCS.2005.28}}
+
+ at article{DBLP:journals/corr/cs-AR-9912010,
+ Author = {B. Devlin and J. Gray and B. Laing and G. Spix},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-28 22:44:14 +0200},
+ Date-Modified = {2008-05-28 22:44:40 +0200},
+ Ee = {http://arxiv.org/abs/cs.AR/9912010},
+ Journal = {CoRR},
+ Title = {{Scalability Terminology: Farms, Clones, Partitions, Packs, RACS and RAPS}},
+ Volume = {cs.AR/9912010},
+ Year = {1999}}
+
+ at inproceedings{PhiWalZie0509,
+ Address = {Poznan, Poland},
+ Author = {Wieder, P. and W{\"a}ldrich, O. and Ziegler, W.},
+ Booktitle = {Proceedings of the 6th International Conference, Parallel Processing and Applied Mathematics, PPAM 2005,},
+ Date-Added = {2008-05-28 16:20:40 +0200},
+ Date-Modified = {2008-06-16 10:33:11 +0200},
+ Keywords = {WP6},
+ Month = {September},
+ Note = {Also published as CoreGRID Technical Report TR0010},
+ Pages = {782 - 791},
+ Publisher = {Springer},
+ Series = {LNCS},
+ Title = {A Meta-Scheduling Service for Co-allocating Arbitrary Types of Resources},
+ Volume = {3911},
+ Year = {2005}}
+
+ at article{10.1109/ICPADS.2007.4447834,
+ Address = {Los Alamitos, CA, USA},
+ Author = {M. S. Memon and A. S. Memon and M. Riedel and B. Schuller and D. Mallmann and B. Tweddell and A. Streit and S. van de Berghe and D. Snelling and V. Li and M. Marzolla and P. Andreetto},
+ Date-Added = {2008-05-28 13:28:45 +0200},
+ Date-Modified = {2008-06-16 10:32:32 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/ICPADS.2007.4447834},
+ Isbn = {978-1-4244-1889-3},
+ Journal = {13th International Conference on Parallel and Distributed Systems},
+ Pages = {1-6},
+ Publisher = {IEEE Computer Society},
+ Title = {{Enhanced Resource Management Capabilities Using Standardized Job Management and Data Access Interfaces within UNICORE Grids}},
+ Volume = {2},
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/ICPADS.2007.4447834}}
+
+ at misc{dcache,
+ Date-Added = {2008-05-27 23:09:17 +0200},
+ Date-Modified = {2008-06-18 22:17:52 +0200},
+ Howpublished = {\url{http://www.dcache.org/}},
+ Key = {dcache},
+ Note = {05/2008},
+ Title = {{dCache Homepage}},
+ Year = {2008}}
+
+ at inproceedings{foster99distributed,
+ Author = {I. Foster and C. Kesselman and C. Lee and R. Lindell and K. Nahrstedt and A. Roy},
+ Booktitle = {Proceedings of the International Workshop on Quality of Service},
+ Date-Added = {2008-05-27 17:19:07 +0200},
+ Date-Modified = {2008-05-27 17:19:19 +0200},
+ Title = {{A Distributed Resource Management Architecture that Supports Advance Reservations and Co-Allocation}},
+ Url = {citeseer.ist.psu.edu/foster99distributed.html},
+ Year = {1999},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/foster99distributed.html}}
+
+ at misc{egee-glite,
+ Date-Added = {2008-05-27 14:28:35 +0200},
+ Date-Modified = {2008-05-27 14:32:57 +0200},
+ Howpublished = {\url{https://edms.cern.ch/file/476451/1.0/architecture.pdf}},
+ Key = {egee},
+ Title = {{EGEE Middleware Architecture}},
+ Year = {2004}}
+
+ at misc{torque:2008bv,
+ Date-Added = {2008-05-27 12:00:50 +0200},
+ Date-Modified = {2008-05-27 12:01:38 +0200},
+ Howpublished = {\url{http://www.clusterresources.com/pages/products/torque-resource-manager.php}},
+ Key = {torque},
+ Title = {Torque Resource Manager},
+ Year = {2008}}
+
+ at misc{XMLSIG,
+ Author = {W3C},
+ Date-Added = {2008-05-27 11:41:06 +0200},
+ Date-Modified = {2008-05-27 11:41:06 +0200},
+ Howpublished = {URL: \url{http://www.w3.org/TR/xmldsig-core/}},
+ Title = {{XML-Signature Syntax and Processing}},
+ Urldate = {16.05.2005},
+ Year = {2002}}
+
+ at misc{XMLENC,
+ Author = {W3C},
+ Date-Added = {2008-05-27 11:40:35 +0200},
+ Date-Modified = {2008-05-27 11:40:35 +0200},
+ Howpublished = {URL: \url{http://www.w3.org/TR/xmlenc-core/}},
+ Title = {{XML Encryption Syntax and Processing}},
+ Urldate = {16.05.2005},
+ Year = {2002}}
+
+ at misc{SAML,
+ Author = {OASIS},
+ Date-Added = {2008-05-27 11:39:24 +0200},
+ Date-Modified = {2008-05-27 11:40:03 +0200},
+ Howpublished = {URL: \url{http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=security#samlv20}},
+ Title = {{SAML V2.0 Standard}},
+ Year = {2005}}
+
+ at misc{soap,
+ Author = {W3C Consortium},
+ Date-Added = {2008-05-27 11:08:46 +0200},
+ Date-Modified = {2008-05-27 11:09:25 +0200},
+ Howpublished = {\url{http://www.w3.org/TR/soap12-part1/}},
+ Title = {{SOAP Version 1.2}}}
+
+ at misc{WSSECURE,
+ Author = {OASIS},
+ Date-Added = {2008-05-27 10:58:23 +0200},
+ Date-Modified = {2008-05-27 11:00:15 +0200},
+ Howpublished = {URL: \url{http://www.oasis-open.org/committees/download.php/16790/wss-v1.1-spec-os-SOAPMessageSecurity.pdf}},
+ Title = {{Web Services Security: SOAP Message Security 1.1}},
+ Urldate = {16.05.2008},
+ Year = {2006}}
+
+ at article{10.1109/CCGRID.2007.53,
+ Address = {Los Alamitos, CA, USA},
+ Author = {M. Morgan and A. Grimshaw},
+ Date-Added = {2008-05-27 10:10:59 +0200},
+ Date-Modified = {2008-05-27 10:15:32 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/CCGRID.2007.53},
+ Isbn = {0-7695-2833-3},
+ Journal = {Proceedings of the Seventh IEEE International Symposium on Cluster Computing and the Grid},
+ Pages = {611-618},
+ Publisher = {IEEE Computer Society},
+ Title = {{Genesis II - Standards Based Grid Computing}},
+ Volume = {00},
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/CCGRID.2007.53}}
+
+ at article{Grimshaw:2008mz,
+ Author = {A. Grimshaw and M. Morgan and K. Sarnowska},
+ Date-Added = {2008-05-27 09:10:36 +0200},
+ Date-Modified = {2008-05-27 09:11:56 +0200},
+ Howpublished = {\url{http://www.ogf.org/OGF_Special_Issue/concP&E.v3.pdf}},
+ Journal = {Concurrency and Computation: Practice and Experience (OGF Special Issue)},
+ Title = {{WS-Naming: Location Migration, Replication, and Failure Transparency Support for Web Services}},
+ Year = {2008}}
+
+ at article{10.1109/CAHPC.2005.33,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Raphael Y. De Camargo and Fabio Kon and Alfredo Goldman},
+ Date-Added = {2008-05-26 23:27:44 +0200},
+ Date-Modified = {2008-05-26 23:27:44 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/CAHPC.2005.33},
+ Issn = {1550-6533},
+ Journal = {sbac-pad},
+ Pages = {226-234},
+ Publisher = {IEEE Computer Society},
+ Title = {Portable checkpointing and communication for BSP applications on dynamic heterogeneous Grid environments},
+ Volume = {0},
+ Year = {2005},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/CAHPC.2005.33}}
+
+ at inproceedings{plank94libckpt,
+ Address = {New Orleans, Louisiana/U.S.A.},
+ Author = {James S. Plank and Micah Beck and Gerry Kingsley and Kai Li},
+ Booktitle = {Proceedings of USENIX Winter1995 Technical Conference},
+ Date-Added = {2008-05-26 22:12:52 +0200},
+ Date-Modified = {2008-05-26 22:12:52 +0200},
+ Month = jan,
+ Pages = {213-224},
+ Title = {{Libckpt: Transparent Checkpointing under Unix}},
+ Url = {\url{http://citeseer.ist.psu.edu/plank95libckpt.html}},
+ Year = {1995},
+ Bdsk-Url-1 = {http://citeseer.ist.psu.edu/plank95libckpt.html}}
+
+ at techreport{litzkow-tr1346,
+ Author = {M. Litzkow and T. Tannenbaum and J. Basney and M. Livny},
+ Date-Added = {2008-05-26 22:11:01 +0200},
+ Date-Modified = {2008-06-28 11:51:00 +0200},
+ Institution = {University of Wisconsin - Madison Computer Sciences Department},
+ Month = {April},
+ Number = {UW-CS-TR-1346},
+ Title = {{Checkpoint and Migration of {UNIX} Processes in the {C}Ondor Distributed Processing System}},
+ Year = {1997}}
+
+ at inproceedings{las98hpdc,
+ Address = {Chicago, IL},
+ Author = {P. Stelling and I. Foster and C. Kesselman and C. Lee and G. von Laszewski},
+ Booktitle = {Proceedings of the 7th IEEE International Symposium on High Performance Distributed Computing},
+ Comment = {vonLaszewski-final.bib},
+ Date-Added = {2008-05-26 19:50:44 +0200},
+ Date-Modified = {2008-05-26 19:51:21 +0200},
+ Month = {28-31~} # JUL,
+ Pages = {268-278},
+ Title = {{A Fault Detection Service for Wide Area Distributed Computations}},
+ Url = {http://www.globus.org/research/papers/vonLaszewski--hbm-hpdc.pdf},
+ Year = {1998},
+ Bdsk-Url-1 = {http://www.globus.org/research/papers/vonLaszewski--hbm-hpdc.pdf}}
+
+ at article{DBLP:journals/grid/HwangK03,
+ Author = {S. Hwang and C. Kesselman},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-26 18:18:37 +0200},
+ Date-Modified = {2008-05-26 18:19:06 +0200},
+ Ee = {http://www.springerlink.com/index/10.1023/B:GRID.0000035187.54694.75},
+ Journal = {Journal of Grid Computing},
+ Number = {3},
+ Pages = {251-272},
+ Title = {A Flexible Framework for Fault Tolerance in the Grid},
+ Volume = {1},
+ Year = {2003}}
+
+ at article{10.1109/HPDC.2006.1652141,
+ Address = {Los Alamitos, CA, USA},
+ Author = {X. Zhang and F. Junqueira and M. Hiltunen and K. Marzullo and R. Schlichting},
+ Date-Added = {2008-05-26 16:36:23 +0200},
+ Date-Modified = {2008-07-20 14:20:17 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/HPDC.2006.1652141},
+ Isbn = {1-4244-0307-3},
+ Journal = {15th IEEE International Symposium on High Performance Distributed Computing},
+ Pages = {105-116},
+ Publisher = {IEEE Computer Society},
+ Title = {{Replicating Nondeterministic Services on Grid Environments}},
+ Volume = {0},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/HPDC.2006.1652141}}
+
+ at article{Jha:2008by,
+ Author = {S. Jha and A. Merzky and G. Fox},
+ Date-Added = {2008-05-26 14:50:25 +0200},
+ Date-Modified = {2008-05-26 14:53:17 +0200},
+ Howpublished = {\url{http://www.ogf.org/OGF_Special_Issue/GridReliabilityDabrowski.pdf}},
+ Journal = {Concurrency and Computation: Practice and Experience (OGF Special Issue)},
+ Title = {{Using Clouds to Provide Grids Higher-Levels of Abstraction and Explicit Support for Usage Modes}},
+ Year = {2008}}
+
+ at inproceedings{DBLP:conf/europar/KolaKL05,
+ Author = {G. Kola and T. Kosar and M. Livny},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Crossref = {DBLP:conf/europar/2005},
+ Date-Added = {2008-05-26 12:39:06 +0200},
+ Date-Modified = {2008-05-26 12:39:42 +0200},
+ Ee = {http://dx.doi.org/10.1007/11549468_51},
+ Pages = {442-453},
+ Title = {{Faults in Large Distributed Systems and What We Can Do About Them}},
+ Year = {2005}}
+
+ at proceedings{DBLP:conf/europar/2005,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Date-Added = {2008-05-26 12:38:59 +0200},
+ Date-Modified = {2008-05-26 12:38:59 +0200},
+ Editor = {Jos{\'e} C. Cunha and Pedro D. Medeiros},
+ Isbn = {3-540-28700-0},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Euro-Par 2005, Parallel Processing, 11th International Euro-Par Conference, Lisbon, Portugal, August 30 - September 2, 2005, Proceedings},
+ Volume = {3648},
+ Year = {2005}}
+
+ at misc{Morgan:2006eu,
+ Author = {M. Morgan},
+ Date-Added = {2008-05-26 11:34:36 +0200},
+ Date-Modified = {2008-05-28 13:37:05 +0200},
+ Howpublished = {Open Grid Forum Document GFD.87},
+ Note = {Open Grid Forum},
+ Title = {{ByteIO Specification 1.0}},
+ Year = 2006}
+
+ at misc{Tierney:2002zm,
+ Author = {B. Tierney and R. Aydt and D. Gunter and W. Smith and M. Swany and Y. Taylor and R. Wolski},
+ Date-Added = {2008-05-26 11:34:31 +0200},
+ Date-Modified = {2008-05-26 11:35:30 +0200},
+ Howpublished = {Open Grid Forum Document GFD.7},
+ Note = {Open Grid Forum},
+ Title = {{A Grid Monitoring Architecture}},
+ Year = 2002}
+
+ at article{10.1109/TC.2002.1004595,
+ Address = {Los Alamitos, CA, USA},
+ Author = {W. Chen and S. Toueg and M.K. Aguilera},
+ Date-Added = {2008-05-26 11:18:39 +0200},
+ Date-Modified = {2008-05-26 11:18:47 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/TC.2002.1004595},
+ Issn = {0018-9340},
+ Journal = {IEEE Transactions on Computers},
+ Number = {5},
+ Pages = {561-580},
+ Publisher = {IEEE Computer Society},
+ Title = {{On the Quality of Service of Failure Detectors}},
+ Volume = {51},
+ Year = {2002},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/TC.2002.1004595}}
+
+ at inproceedings{384010,
+ Address = {New York, NY, USA},
+ Author = {I. Gupta and T. Chandra and G. Goldszmidt},
+ Booktitle = {PODC '01: Proceedings of the twentieth annual ACM symposium on Principles of distributed computing},
+ Date-Added = {2008-05-26 09:39:25 +0200},
+ Date-Modified = {2008-05-26 09:40:01 +0200},
+ Doi = {http://doi.acm.org/10.1145/383962.384010},
+ Isbn = {1-58113-383-9},
+ Location = {Newport, Rhode Island, United States},
+ Pages = {170--179},
+ Publisher = {ACM},
+ Title = {{On Scalable and Efficient Distributed Failure Detectors}},
+ Year = {2001},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/383962.384010}}
+
+ at article{Dabrowski:2008ad,
+ Author = {C. Dabrowski},
+ Date-Added = {2008-05-26 09:36:33 +0200},
+ Date-Modified = {2008-05-26 09:39:20 +0200},
+ Howpublished = {\url{http://www.ogf.org/OGF_Special_Issue/GridReliabilityDabrowski.pdf}},
+ Journal = {Concurrency and Computation: Practice and Experience (OGF Special Issue)},
+ Title = {Reliability in Grid Computing Systems},
+ Year = {2008}}
+
+ at article{367728,
+ Address = {New York, NY, USA},
+ Author = {D. Miloji\v{c}i\'{c} and F. Douglis and Y. Paindaveine and R. Wheeler and S. Zhou},
+ Date-Added = {2008-05-26 09:12:03 +0200},
+ Date-Modified = {2008-05-26 09:12:34 +0200},
+ Doi = {http://doi.acm.org/10.1145/367701.367728},
+ Issn = {0360-0300},
+ Journal = {ACM Comput. Surv.},
+ Number = {3},
+ Pages = {241--299},
+ Publisher = {ACM},
+ Title = {{Process Migration}},
+ Volume = {32},
+ Year = {2000},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/367701.367728}}
+
+ at misc{ganglia,
+ Date-Added = {2008-05-25 23:46:52 +0200},
+ Date-Modified = {2008-05-25 23:47:26 +0200},
+ Howpublished = {\url{http://ganglia.info/}},
+ Key = {ganglia},
+ Title = {Ganglia Homepage},
+ Year = {2008}}
+
+ at article{10.1109/IPDPS.2006.1639275,
+ Address = {Los Alamitos, CA, USA},
+ Author = {E. Benson and G. Wasson and M. Humphrey},
+ Date-Added = {2008-05-25 23:07:19 +0200},
+ Date-Modified = {2008-05-25 23:07:49 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2006.1639275},
+ Isbn = {1-4244-0054-6},
+ Journal = {ipdps},
+ Pages = {18},
+ Publisher = {IEEE Computer Society},
+ Title = {{Evaluation of UDDI as a Provider of Resource Discovery Services for OGSA-Based Grids}},
+ Volume = {0},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2006.1639275}}
+
+ at inproceedings{Bresnahan:2007hf,
+ Author = {J. Bresnahan and M. Link and G. Khanna and Z. Imani and R. Kettimuthu and I. Foster},
+ Booktitle = {Proceedings of the First International Conference on Networks for Grid Applications },
+ Date-Added = {2008-05-25 22:14:13 +0200},
+ Date-Modified = {2008-05-29 14:41:11 +0200},
+ Title = {{Globus GridFTP: What's New in 2007}},
+ Year = {2007}}
+
+ at misc{RFC0959,
+ Acknowledgement = {Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, FAX: +1 801 585 1640, +1 801 581 4148, e-mail: \path|beebe at math.utah.edu|, \path|beebe at acm.org|, \path|beebe at ieee.org| (Internet), URL: \path|http://www.math.utah.edu/~beebe/|},
+ Author = {J. Postel and J. K. Reynolds},
+ Bibdate = {Sat Jan 10 08:59:55 MST 1998},
+ Date-Added = {2008-05-25 22:11:01 +0200},
+ Date-Modified = {2008-05-26 16:38:05 +0200},
+ Day = {1},
+ Format = {TXT=151249 bytes},
+ Month = oct,
+ Note = {Status: STANDARD.},
+ Online = {yes},
+ Status = {STANDARD},
+ Title = {{RFC 959}: File Transfer Protocol},
+ Updatedby = {Updated by RFC2228 \cite{RFC2228}.},
+ Url = {ftp://ftp.internic.net/rfc/rfc2228.txt, ftp://ftp.internic.net/rfc/rfc765.txt, ftp://ftp.internic.net/rfc/rfc959.txt, ftp://ftp.math.utah.edu/pub/rfc/rfc2228.txt, ftp://ftp.math.utah.edu/pub/rfc/rfc765.txt, ftp://ftp.math.utah.edu/pub/rfc/rfc959.txt},
+ Year = {1985},
+ Bdsk-Url-1 = {ftp://ftp.internic.net/rfc/rfc2228.txt,%20%20%20%20ftp://ftp.internic.net/rfc/rfc765.txt,%20%20%20%20ftp://ftp.internic.net/rfc/rfc959.txt,%20%20%20%20ftp://ftp.math.utah.edu/pub/rfc/rfc2228.txt,%20%20%20%20ftp://ftp.math.utah.edu/pub/rfc/rfc765.txt,%20%20%20%20ftp://ftp.math.utah.edu/pub/rfc/rfc959.txt}}
+
+ at misc{Allcock:2002mw,
+ Author = {W. Allcock},
+ Date-Added = {2008-05-25 22:06:26 +0200},
+ Date-Modified = {2008-05-25 22:07:07 +0200},
+ Howpublished = {Open Grid Forum Document GFD.22},
+ Note = {Open Grid Forum},
+ Title = {{GridFTP: Protocol Extensions to FTP for the Grid}},
+ Year = 2002}
+
+ at misc{Mandrichenko:2005um,
+ Author = {I. Mandrichenko and W. Allcock and T. Perelmutov},
+ Date-Added = {2008-05-25 21:57:15 +0200},
+ Date-Modified = {2008-05-25 22:08:32 +0200},
+ Howpublished = {Open Grid Forum Document GFD.47},
+ Note = {Open Grid Forum},
+ Title = {{GridFTP v2 Protocol Description}},
+ Year = 2005}
+
+ at book{Sotomayor:2005gl,
+ Author = {B. Sotomayor and L. Childers},
+ Date-Added = {2008-05-25 20:26:49 +0200},
+ Date-Modified = {2008-06-16 10:36:42 +0200},
+ Publisher = {Morgan Kaufmann Publishers},
+ Title = {{Globus Toolkit 4 -- Programming Java Services}},
+ Year = {2005}}
+
+ at misc{axis,
+ Date-Added = {2008-05-25 20:11:06 +0200},
+ Date-Modified = {2008-05-25 20:11:46 +0200},
+ Howpublished = {\url{http://ws.apache.org/axis/}},
+ Key = {axis},
+ Title = {{Axis Web Services}},
+ Year = {2006}}
+
+ at inproceedings{1194624,
+ Address = {Washington, DC, USA},
+ Author = {I. Foster},
+ Booktitle = {IAT '06: Proceedings of the IEEE/WIC/ACM international conference on Intelligent Agent Technology},
+ Date-Added = {2008-05-25 18:59:14 +0200},
+ Date-Modified = {2008-05-25 18:59:26 +0200},
+ Doi = {http://dx.doi.org/10.1109/IAT.2006.110},
+ Isbn = {0-7695-2748-5},
+ Pages = {9--10},
+ Publisher = {IEEE Computer Society},
+ Title = {{Service-Oriented Science: Scaling eScience Impact}},
+ Year = {2006},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/IAT.2006.110}}
+
+ at misc{Roehrig:2002ad,
+ Author = {M. Roehrig and W. Ziegler and P. Wieder},
+ Date-Added = {2008-05-25 17:14:37 +0200},
+ Date-Modified = {2008-05-25 17:15:35 +0200},
+ Howpublished = {Open Grid Forum Document GFD.11},
+ Note = {Open Grid Forum},
+ Title = {{Grid Scheduling Dictionary of Terms and Keywords}},
+ Year = 2002}
+
+ at misc{Stokes:2007bs,
+ Author = {E. Stokes},
+ Date-Added = {2008-05-25 14:12:55 +0200},
+ Date-Modified = {2008-05-25 16:44:41 +0200},
+ Howpublished = {Open Grid Forum Document GFD.119},
+ Note = {Open Grid Forum},
+ Title = {{Execution Environment and Basic Execution Service Model in OGSA Grids}},
+ Year = 2007}
+
+ at misc{Andrieux:2007sj,
+ Author = {A. Andrieux and K. Czajkowski and A. Dan and K. Keahey and H. Ludwig and T. Nakata and J. Pruyne and J. Rofrano and S. Tuecke and M. Xu},
+ Date-Added = {2008-05-25 13:57:11 +0200},
+ Date-Modified = {2008-05-25 14:15:14 +0200},
+ Howpublished = {Open Grid Forum Document GFD.107},
+ Note = {Open Grid Forum},
+ Title = {{Web Services Agreement Specification (WS-Agreement)}},
+ Year = 2007}
+
+ at article{1063213,
+ Address = {Riverton, NJ, USA},
+ Author = {J. Joseph and M. Ernest and C. Fellenstein},
+ Date-Added = {2008-05-25 12:39:50 +0200},
+ Date-Modified = {2008-05-25 12:40:22 +0200},
+ Issn = {0018-8670},
+ Journal = {IBM Systems Journal },
+ Number = {4},
+ Pages = {624--645},
+ Publisher = {IBM Corp.},
+ Title = {{Evolution of Grid Computing Architecture and Grid Adoption Models}},
+ Volume = {43},
+ Year = {2004}}
+
+ at article{1132955,
+ Address = {New York, NY, USA},
+ Author = {S. Venugopal and R. Buyya and K. Ramamohanarao},
+ Date-Added = {2008-05-25 12:34:32 +0200},
+ Date-Modified = {2008-05-25 12:35:52 +0200},
+ Doi = {http://doi.acm.org/http://doi.acm.org/10.1145/1132952.1132955},
+ Issn = {0360-0300},
+ Journal = {ACM Computing Surveys},
+ Number = {1},
+ Pages = {3},
+ Publisher = {ACM},
+ Title = {{A Taxonomy of Data Grids for Distributed Data Sharing, Management, and Processing}},
+ Volume = {38},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.acm.org/http://doi.acm.org/10.1145/1132952.1132955}}
+
+ at article{DBLP:journals/corr/cs-NI-0403019,
+ Author = {J. Gray},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-24 18:52:28 +0200},
+ Date-Modified = {2008-06-18 19:17:01 +0200},
+ Ee = {http://arxiv.org/abs/cs.NI/0403019},
+ Journal = {Computer Research Repository (CoRR) },
+ Title = {{Distributed Computing Economics}},
+ Volume = {cs.NI/0403019},
+ Year = {2004}}
+
+ at article{1239657,
+ Address = {Amsterdam, The Netherlands, The Netherlands},
+ Author = {K. Keahey and I. Foster and T. Freeman and X. Zhang},
+ Date-Added = {2008-05-24 14:39:44 +0200},
+ Date-Modified = {2008-05-24 14:40:40 +0200},
+ Issn = {1058-9244},
+ Journal = {Scientific Programming},
+ Number = {4},
+ Pages = {265--275},
+ Publisher = {IOS Press},
+ Title = {{Virtual Workspaces: Achieving Quality of Service and Quality of Life in the Grid}},
+ Volume = {13},
+ Year = {2005}}
+
+ at misc{gartnercloud,
+ Booktitle = {Gartner Emerging Trends Symposium/ITxpo},
+ Date-Added = {2008-05-24 14:33:43 +0200},
+ Date-Modified = {2008-05-24 14:35:31 +0200},
+ Howpublished = {\url{http://www.gartner.com/it/page.jsp?id=640909}},
+ Key = {Gartner},
+ Title = {{Gartner Says IT Leaders Should Prepare for the Third Wave of Innovation to Drive Growth}},
+ Year = {2008}}
+
+ at misc{Lindsay:2007qp,
+ Author = {R. Lindsay},
+ Date-Added = {2008-05-24 14:24:23 +0200},
+ Date-Modified = {2008-05-24 14:25:15 +0200},
+ Howpublished = {\url{http://business.timesonline.co.uk/tol/business/markets/article2828050.ece}},
+ Title = {Dealers in the dark after systems failure Dealers in the Dark after Systems Failure},
+ Year = {2007}}
+
+ at inproceedings{Palankar:2007wj,
+ Address = {Cambridge, MA, USA},
+ Author = {M. Palankar and A. Onibokun and Adriana Iamnitchi},
+ Booktitle = {Proceedings of 4th USENIX Symposium on Networked Systems Design \& Implementation},
+ Date-Added = {2008-05-24 13:42:49 +0200},
+ Date-Modified = {2008-05-24 13:45:03 +0200},
+ Title = {{Amazon S3 for Science Grids: A Viable Solution?}},
+ Year = {2007}}
+
+ at inproceedings{945462,
+ Address = {New York, NY, USA},
+ Author = {P. Barham and B. Dragovic and K. Fraser and S. Hand and T. Harris and A. Ho and R. Neugebauer and I. Pratt and A. Warfield},
+ Booktitle = {SOSP '03: Proceedings of the nineteenth ACM symposium on Operating systems principles},
+ Date-Added = {2008-05-24 13:38:00 +0200},
+ Date-Modified = {2008-06-29 17:10:00 +0200},
+ Doi = {http://doi.acm.org/10.1145/945445.945462},
+ Isbn = {1-58113-757-5},
+ Location = {Bolton Landing, NY, USA},
+ Pages = {164--177},
+ Publisher = {ACM},
+ Title = {{Xen and the Art of Virtualization}},
+ Year = {2003},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/945445.945462}}
+
+ at inproceedings{Montero:2008ph,
+ Author = {R. S. Montero},
+ Booktitle = {{Talk at Open Source Grid \& Cluster Conference}},
+ Date-Added = {2008-05-24 13:28:23 +0200},
+ Date-Modified = {2008-05-24 21:15:35 +0200},
+ Howpublished = {\url{http://www.opensourcegridcluster.org/documents/Nebula.pdf}},
+ Title = {{OpenNebula: Open Source Virtual Machine Manager for Cluster Computing}},
+ Year = {2008}}
+
+ at inproceedings{Wolski:2008gd,
+ Author = {R. Wolski},
+ Booktitle = {{Talk at Open Source Grid \& Cluster Conference}},
+ Date-Added = {2008-05-24 13:22:00 +0200},
+ Date-Modified = {2008-05-24 21:15:41 +0200},
+ Howpublished = {\url{http://www.opensourcegridcluster.org/documents/RWoleski-eucalyptus-08.ppt}},
+ Title = {{EUCALYPTUS: An Open Source Service Infrastructure for Elastic Computing Research}},
+ Year = {2008}}
+
+ at misc{amazons3,
+ Date-Added = {2008-05-24 12:17:23 +0200},
+ Date-Modified = {2008-05-24 14:47:24 +0200},
+ Howpublished = {\url{http://s3.amazonaws.com}},
+ Key = {Amazon S3},
+ Title = {{Amazon S3 Web Service}},
+ Year = {2008}}
+
+ at misc{amazonec2,
+ Date-Added = {2008-05-24 12:16:25 +0200},
+ Date-Modified = {2008-05-29 14:42:18 +0200},
+ Howpublished = {\url{http://ec2.amazonaws.com}},
+ Key = {Amazon S3},
+ Title = {{Amazon EC$^2$ Web Service}},
+ Year = {2008}}
+
+ at misc{egee,
+ Date-Added = {2008-05-24 00:05:33 +0200},
+ Date-Modified = {2008-05-24 00:06:11 +0200},
+ Howpublished = {{\url{http://public.eu-egee.org/}}},
+ Key = {TeraGrid},
+ Title = {{{EGEE -- European Grid Infrastructure}}},
+ Year = {{2008}}}
+
+ at misc{teragrid,
+ Date-Added = {2008-05-24 00:03:39 +0200},
+ Date-Modified = {2008-05-24 00:05:18 +0200},
+ Howpublished = {{\url{http://www.teragrid.org/}}},
+ Key = {TeraGrid},
+ Title = {{{TeraGrid -- American Grid Infrastructure}}},
+ Year = {{2008}}}
+
+ at article{2008ApJ...678..621K,
+ Adsnote = {Provided by the SAO/NASA Astrophysics Data System},
+ Adsurl = {http://esoads.eso.org/abs/2008ApJ...678..621K},
+ Author = {{Knebe}, A. and {Power}, C.},
+ Date-Added = {2008-05-23 17:54:13 +0200},
+ Date-Modified = {2008-05-23 17:54:42 +0200},
+ Doi = {10.1086/586702},
+ Eprint = {arXiv:0801.4453},
+ Journal = {The Astrophysical Journal},
+ Keywords = {Cosmology: Theory, Cosmology: Early Universe, Galaxies: Formation, Methods: Numerical},
+ Month = may,
+ Pages = {621-626},
+ Title = {{On the Correlation between Spin Parameter and Halo Mass}},
+ Volume = 678,
+ Year = 2008,
+ Bdsk-Url-1 = {http://dx.doi.org/10.1086/586702}}
+
+ at misc{Roblitz:2008ca,
+ Author = {Thomas R\"oblitz and Alexander Knebe},
+ Date-Added = {2008-05-23 17:33:57 +0200},
+ Date-Modified = {2008-05-23 17:54:08 +0200},
+ Howpublished = {\url{http://www.gac-grid.de/project-documents/UseCases/amiga.pdf}},
+ Title = {AMIGA Astrogrid-D Use Case Document},
+ Year = {2008}}
+
+ at inproceedings{hansmann,
+ Author = {U.H.E. Hansmann},
+ Booktitle = {Chemical Physics Letters},
+ Date-Added = {2008-05-23 15:29:39 +0200},
+ Date-Modified = {2008-05-23 15:29:39 +0200},
+ Pages = {140-150},
+ Title = {{Parallel Tempering Algorithm for Conformational Studies of Biological Molecules}},
+ Volume = 281,
+ Year = 1997}
+
+ at inproceedings{Schneidenbach:2008cq,
+ Author = {L. Schneidenbach and D. B\"ohme and B. Schnor},
+ Booktitle = {Proceedings of the 15th EuroPVM/MPI},
+ Date-Added = {2008-05-23 13:54:58 +0200},
+ Date-Modified = {2008-05-24 18:57:00 +0200},
+ Title = {{Performance Issues of Synchronisation in the MPI-2 One-Sided Communication API}},
+ Year = {to appear 2008}}
+
+ at book{NKS02,
+ Address = {Champaign, Ill},
+ Author = {Wolfram, Stephen },
+ Citeulike-Article-Id = {380389},
+ Date-Added = {2008-05-23 12:57:31 +0200},
+ Date-Modified = {2008-05-23 12:57:45 +0200},
+ Keywords = {automata, bibtex-import, cellular, cognition, complexity, computational, recursive, sequences, ucl},
+ Posted-At = {2005-11-04 11:50:58},
+ Priority = {2},
+ Publisher = {Wolfram Media},
+ Title = {{A New Kind of Science}},
+ Url = {http://www.wolframscience.com/nksonline/toc.html},
+ Year = {2002},
+ Bdsk-Url-1 = {http://www.wolframscience.com/nksonline/toc.html}}
+
+ at proceedings{DBLP:conf/pvm/2005,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {PVM/MPI},
+ Date-Added = {2008-05-23 12:43:01 +0200},
+ Date-Modified = {2008-05-23 12:43:01 +0200},
+ Editor = {Beniamino Di Martino and Dieter Kranzlm{\"u}ller and Jack Dongarra},
+ Isbn = {3-540-29009-5},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Recent Advances in Parallel Virtual Machine and Message Passing Interface, 12th European PVM/MPI Users' Group Meeting, Sorrento, Italy, September 18-21, 2005, Proceedings},
+ Volume = {3666},
+ Year = {2005}}
+
+ at inproceedings{DBLP:conf/pvm/KomannKF05,
+ Author = {Marcus Komann and Christian Kauhaus and Dietmar Fey},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Proceedings of the 12th EuroPVM/MPI},
+ Date-Added = {2008-05-23 12:42:54 +0200},
+ Date-Modified = {2008-05-24 21:14:01 +0200},
+ Ee = {http://dx.doi.org/10.1007/11557265_67},
+ Pages = {528-535},
+ Title = {{Calculation of Single-File Diffusion Using Grid-Enabled Parallel Generic Cellular Automata Simulation}},
+ Year = {2005}}
+
+ at article{Gardner:1970lk,
+ Author = {Martin Gardner},
+ Date-Added = {2008-05-23 12:08:10 +0200},
+ Date-Modified = {2008-05-23 12:10:06 +0200},
+ Journal = {Scientific American},
+ Pages = {120--123},
+ Title = {{Mathematical Games: The Fantastic Combinations of John Conway's new Solitaire Game ``life''}},
+ Volume = {223},
+ Year = {1970}}
+
+ at book{1102024,
+ Address = {Champaign, IL, USA},
+ Author = {John Von Neumann},
+ Date-Added = {2008-05-23 11:50:13 +0200},
+ Date-Modified = {2008-05-23 11:52:15 +0200},
+ Editor = {Arthur W. Burks},
+ Publisher = {University of Illinois Press},
+ Title = {{Theory of Self-Reproducing Automata}},
+ Year = {1966}}
+
+ at techreport{Asanovic:EECS-2006-183,
+ Author = {Asanovic, K. and Bodik, R. and Catanzaro, B. and Gebis, J. and Husbands, P. and Keutzer, K. and Patterson, D. and Plishker, W. and Shalf, J. and Williams, S. and Yelick, K.},
+ Date-Added = {2008-05-22 21:12:44 +0200},
+ Date-Modified = {2008-07-20 14:16:58 +0200},
+ Institution = {EECS Department, University of California, Berkeley},
+ Month = {Dec},
+ Number = {UCB/EECS-2006-183},
+ Title = {{The Landscape of Parallel Computing Research: A View from Berkeley}},
+ Url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2006/EECS-2006-183.html},
+ Year = {2006},
+ Bdsk-Url-1 = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2006/EECS-2006-183.html}}
+
+ at article{10.1109/IPDPS.2007.370551,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Satish Penmatsa and Anthony T. Chronopoulos and Nicholas T. Karonis and Brian R. Toonen},
+ Date-Added = {2008-05-22 18:40:14 +0200},
+ Date-Modified = {2008-05-22 18:41:15 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2007.370551},
+ Isbn = {1-4244-0909-8},
+ Journal = {Proceedings of the IEEE International Parallel and Distributed Processing Symposium },
+ Pages = {361},
+ Publisher = {IEEE Computer Society},
+ Title = {{Implementation of Distributed Loop Scheduling Schemes on the TeraGrid} },
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/IPDPS.2007.370551}}
+
+ at misc{Toonen:2008ao,
+ Author = {Brian R. Toonen},
+ Date-Added = {2008-05-22 18:29:45 +0200},
+ Date-Modified = {2008-05-22 18:40:12 +0200},
+ Howpublished = {\url{http://wiki.ngs.ac.uk/index.php?title=MPIg}},
+ Organization = {National Grid Service},
+ Title = {MPIg Homepage},
+ Year = {2008}}
+
+ at inproceedings{748452,
+ Address = {London, UK},
+ Author = {Thomas Beisel and Edgar Gabriel and Michael Resch},
+ Booktitle = {Proceedings of the 4th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
+ Date-Added = {2008-05-22 18:26:24 +0200},
+ Date-Modified = {2008-05-22 18:26:32 +0200},
+ Isbn = {3-540-63697-8},
+ Pages = {75--82},
+ Publisher = {Springer-Verlag},
+ Title = {{An Extension to MPI for Distributed Computing on MPPs}},
+ Year = {1997}}
+
+ at inproceedings{93399,
+ Address = {New York, NY, USA},
+ Author = {Rivka Ladin and Barbara Liskov and Liuba Shrira},
+ Booktitle = {PODC '90: Proceedings of the ninth annual ACM symposium on Principles of distributed computing},
+ Date-Added = {2008-05-22 13:18:23 +0200},
+ Date-Modified = {2008-05-22 13:18:40 +0200},
+ Doi = {http://doi.acm.org/10.1145/93385.93399},
+ Isbn = {0-89791-404-X},
+ Location = {Quebec City, Quebec, Canada},
+ Pages = {43--57},
+ Publisher = {ACM},
+ Title = {{Lazy Replication: Exploiting the Semantics of Distributed Services}},
+ Year = {1990},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/93385.93399}}
+
+ at article{227229,
+ Address = {New York, NY, USA},
+ Author = {R. van Renesse and K. Birman and S. Maffeis},
+ Date-Added = {2008-05-22 12:40:38 +0200},
+ Date-Modified = {2008-06-28 14:29:37 +0200},
+ Doi = {http://doi.acm.org/10.1145/227210.227229},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {4},
+ Pages = {76--83},
+ Publisher = {ACM},
+ Title = {{Horus: A Flexible Group Communication System}},
+ Volume = {39},
+ Year = {1996},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/227210.227229}}
+
+ at inproceedings{1251262,
+ Address = {Berkeley, CA, USA},
+ Author = {John MacCormick and Nick Murphy and Marc Najork and Chandramohan A. Thekkath and Lidong Zhou},
+ Booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation},
+ Date-Added = {2008-05-22 12:31:07 +0200},
+ Date-Modified = {2008-05-22 12:31:26 +0200},
+ Location = {San Francisco, CA},
+ Pages = {8--8},
+ Publisher = {USENIX Association},
+ Title = {{Boxwood: Abstractions as the Foundation for Storage Infrastructure}},
+ Year = {2004}}
+
+ at article{5508,
+ Address = {New York, NY, USA},
+ Author = {Susan B. Davidson and Hector Garcia-Molina and Dale Skeen},
+ Date-Added = {2008-05-21 19:37:42 +0200},
+ Date-Modified = {2008-05-21 19:38:00 +0200},
+ Doi = {http://doi.acm.org/10.1145/5505.5508},
+ Issn = {0360-0300},
+ Journal = {ACM Comput. Surv.},
+ Number = {3},
+ Pages = {341--370},
+ Publisher = {ACM},
+ Title = {{Consistency in a Partitioned Network: A Survey}},
+ Volume = {17},
+ Year = {1985},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/5505.5508}}
+
+ at inproceedings{1267332,
+ Address = {Berkeley, CA, USA},
+ Author = {M. Burrows},
+ Booktitle = {USENIX'06: Proceedings of the 7th conference on USENIX Symposium on Operating Systems Design and Implementation},
+ Date-Added = {2008-05-21 15:59:01 +0200},
+ Date-Modified = {2008-06-29 00:23:12 +0200},
+ Location = {Seattle, WA},
+ Pages = {24--24},
+ Publisher = {USENIX Association},
+ Title = {{The Chubby Lock Service for Loosely-Coupled Distributed Systems}},
+ Year = {2006}}
+
+ at inproceedings{806583,
+ Address = {New York, NY, USA},
+ Author = {David K. Gifford},
+ Booktitle = {SOSP '79: Proceedings of the seventh ACM symposium on Operating systems principles},
+ Date-Added = {2008-05-21 14:55:50 +0200},
+ Date-Modified = {2008-05-21 14:56:06 +0200},
+ Doi = {http://doi.acm.org/10.1145/800215.806583},
+ Isbn = {0-89791-009-5},
+ Location = {Pacific Grove, California, United States},
+ Pages = {150--162},
+ Publisher = {ACM},
+ Title = {{Weighted Voting for Replicated Data}},
+ Year = {1979},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/800215.806583}}
+
+ at article{lamport01paxos,
+ Author = {Lamport, L.},
+ Citeulike-Article-Id = {541278},
+ Date-Added = {2008-05-21 14:51:51 +0200},
+ Date-Modified = {2008-06-28 11:58:45 +0200},
+ Journal = {ACM SIGACT News},
+ Keywords = {bibtex-import},
+ Month = {December},
+ Number = {4},
+ Pages = {18--25},
+ Posted-At = {2006-03-09 04:35:31},
+ Priority = {2},
+ Title = {{{P}axos Made Simple}},
+ Volume = {32},
+ Year = {2001}}
+
+ at article{279229,
+ Address = {New York, NY, USA},
+ Author = {L. Lamport},
+ Date-Added = {2008-05-21 14:25:46 +0200},
+ Date-Modified = {2008-06-28 11:57:51 +0200},
+ Doi = {http://doi.acm.org/10.1145/279227.279229},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems},
+ Number = {2},
+ Pages = {133--169},
+ Publisher = {ACM},
+ Title = {{The Part-Time Parliament}},
+ Volume = {16},
+ Year = {1998},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/279227.279229}}
+
+ at article{322158,
+ Address = {New York, NY, USA},
+ Author = {Christos H. Papadimitriou},
+ Date-Added = {2008-05-21 13:53:13 +0200},
+ Date-Modified = {2008-05-21 13:53:26 +0200},
+ Doi = {http://doi.acm.org/10.1145/322154.322158},
+ Issn = {0004-5411},
+ Journal = {J. ACM},
+ Number = {4},
+ Pages = {631--653},
+ Publisher = {ACM},
+ Title = {The Serializability of Concurrent Database Updates},
+ Volume = {26},
+ Year = {1979},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/322154.322158}}
+
+ at article{10.1109/TC.1979.1675439,
+ Address = {Los Alamitos, CA, USA},
+ Author = {L. Lamport},
+ Date-Added = {2008-05-21 13:34:50 +0200},
+ Date-Modified = {2008-05-21 13:34:59 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/TC.1979.1675439},
+ Issn = {0018-9340},
+ Journal = {IEEE Transactions on Computers},
+ Number = {9},
+ Pages = {690-691},
+ Publisher = {IEEE Computer Society},
+ Title = {{How to Make a Multiprocessor Computer That Correctly Executes Multiprocess Progranm}},
+ Volume = {28},
+ Year = {1979},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/TC.1979.1675439}}
+
+ at article{10.1109/TC.1982.1675885,
+ Address = {Los Alamitos, CA, USA},
+ Author = {H. Garcia-Molina},
+ Date-Added = {2008-05-21 12:47:14 +0200},
+ Date-Modified = {2008-05-21 12:47:31 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/TC.1982.1675885},
+ Issn = {0018-9340},
+ Journal = {IEEE Transactions on Computers},
+ Number = {1},
+ Pages = {48-59},
+ Publisher = {IEEE Computer Society},
+ Title = {{Elections in a Distributed Computing System}},
+ Volume = {31},
+ Year = {1982},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/TC.1982.1675885}}
+
+ at article{359563,
+ Address = {New York, NY, USA},
+ Author = {L. Lamport},
+ Date-Added = {2008-05-21 11:09:06 +0200},
+ Date-Modified = {2008-06-28 11:58:17 +0200},
+ Doi = {http://doi.acm.org/10.1145/359545.359563},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {7},
+ Pages = {558--565},
+ Publisher = {ACM},
+ Title = {{Time, Clocks, and the Ordering of Events in a Distributed System}},
+ Volume = {21},
+ Year = {1978},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/359545.359563}}
+
+ at article{320076,
+ Address = {New York, NY, USA},
+ Author = {R. Thomas},
+ Date-Added = {2008-05-21 09:55:45 +0200},
+ Date-Modified = {2008-06-28 21:27:43 +0200},
+ Doi = {http://doi.acm.org/10.1145/320071.320076},
+ Issn = {0362-5915},
+ Journal = {ACM Transactions on Database Systems},
+ Number = {2},
+ Pages = {180--209},
+ Publisher = {ACM},
+ Title = {{A Majority Consensus Approach to Concurrency Control for Multiple Copy Databases}},
+ Volume = {4},
+ Year = {1979},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/320071.320076}}
+
+ at article{10.1109/C-M.1975.218955,
+ Address = {Los Alamitos, CA, USA},
+ Author = {K. Chandy},
+ Date-Added = {2008-05-21 09:17:57 +0200},
+ Date-Modified = {2008-05-25 23:16:25 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/C-M.1975.218955},
+ Issn = {0018-9162},
+ Journal = {Computer},
+ Number = {5},
+ Pages = {40-47},
+ Publisher = {IEEE Computer Society},
+ Title = {{A Survey of Analytic Models of Rollback and Recovery Stratergies}},
+ Volume = {8},
+ Year = {1975},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/C-M.1975.218955}}
+
+ at techreport{866215,
+ Address = {Ithaca, NY, USA},
+ Author = {K. P. Birman and T. A. Joseph},
+ Date-Added = {2008-05-20 22:32:01 +0200},
+ Date-Modified = {2008-06-16 10:30:41 +0200},
+ Publisher = {Cornell University},
+ Source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Acornellcs%3ACORNELLCS%3ATR87-811},
+ Title = {{Exploiting Virtual Synchrony in Distributed Systems}},
+ Year = {1987}}
+
+ at article{7478,
+ Address = {New York, NY, USA},
+ Author = {K. P. Birman and T. A. Joseph},
+ Date-Added = {2008-05-20 21:53:09 +0200},
+ Date-Modified = {2008-06-16 10:31:29 +0200},
+ Doi = {http://doi.acm.org/10.1145/7351.7478},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems},
+ Number = {1},
+ Pages = {47--76},
+ Publisher = {ACM},
+ Title = {{Reliable Communication in the Presence of Failures}},
+ Volume = {5},
+ Year = {1987},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/7351.7478}}
+
+ at article{163303,
+ Address = {New York, NY, USA},
+ Author = {K. P. Birman},
+ Date-Added = {2008-05-20 21:51:35 +0200},
+ Date-Modified = {2008-06-16 10:31:19 +0200},
+ Doi = {http://doi.acm.org/10.1145/163298.163303},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {12},
+ Pages = {37--53},
+ Publisher = {ACM},
+ Title = {{The Process Group Approach to Reliable Distributed Computing}},
+ Volume = {36},
+ Year = {1993},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/163298.163303}}
+
+ at techreport{Jason03BLCR,
+ Author = {Jason Duell},
+ Citeulike-Article-Id = {1084430},
+ Date-Added = {2008-05-20 15:23:24 +0200},
+ Date-Modified = {2008-05-20 15:24:06 +0200},
+ Institution = {Lawrence Berkeley National Laboratory},
+ Keywords = {blcr, checkpoint, linux, restart},
+ Month = {November},
+ Posted-At = {2007-02-02 15:07:21},
+ Priority = {0},
+ Title = {{The Design and Implementation of Berkeley Lab's Linux Checkpoint/Restart}},
+ Year = {2003}}
+
+ at article{568525,
+ Address = {New York, NY, USA},
+ Author = {Elmootazbellah N. Elnozahy and Lorenzo Alvisi and Yi-Min Wang and David B. Johnson},
+ Date-Added = {2008-05-20 10:42:22 +0200},
+ Date-Modified = {2008-05-20 10:43:57 +0200},
+ Doi = {http://doi.acm.org/10.1145/568522.568525},
+ Issn = {0360-0300},
+ Journal = {ACM Computing Surveys},
+ Number = {3},
+ Pages = {375--408},
+ Publisher = {ACM},
+ Title = {{A Survey of Rollback-Recovery Protocols in Message-Passing Systems}},
+ Volume = {34},
+ Year = {2002},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/568522.568525}}
+
+ at article{1032593,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Elmootazbellah N. Elnozahy and Member-James S. Plank},
+ Date-Added = {2008-05-20 10:14:34 +0200},
+ Date-Modified = {2008-05-20 10:16:00 +0200},
+ Doi = {http://dx.doi.org/10.1109/TDSC.2004.15},
+ Issn = {1545-5971},
+ Journal = {IEEE Transactions on Dependable Secure Computing},
+ Number = {2},
+ Pages = {97--108},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{Checkpointing for Peta-Scale Systems: A Look into the Future of Practical Rollback-Recovery}},
+ Volume = {1},
+ Year = {2004},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/TDSC.2004.15}}
+
+ at article{214456,
+ Address = {New York, NY, USA},
+ Author = {K. Chandy and L. Lamport},
+ Date-Added = {2008-05-20 10:00:59 +0200},
+ Date-Modified = {2008-05-25 23:15:35 +0200},
+ Doi = {http://doi.acm.org/10.1145/214451.214456},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems },
+ Number = {1},
+ Pages = {63--75},
+ Publisher = {ACM},
+ Title = {{Distributed Snapshots: Determining Global States of Distributed Systems}},
+ Volume = {3},
+ Year = {1985},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/214451.214456}}
+
+ at inproceedings{62575,
+ Address = {New York, NY, USA},
+ Author = {David B. Johnson and Willy Zwaenepoel},
+ Booktitle = {PODC '88: Proceedings of the seventh annual ACM Symposium on Principles of distributed computing},
+ Date-Added = {2008-05-19 23:46:53 +0200},
+ Date-Modified = {2008-05-19 23:47:20 +0200},
+ Doi = {http://doi.acm.org/10.1145/62546.62575},
+ Isbn = {0-89791-277-2},
+ Location = {Toronto, Ontario, Canada},
+ Pages = {171--181},
+ Publisher = {ACM},
+ Title = {{Recovery in Distributed Systems using Asynchronous Message Logging and Checkpointing}},
+ Year = {1988},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/62546.62575}}
+
+ at inproceedings{233330,
+ Address = {New York, NY, USA},
+ Author = {J. Gray and P. Helland and P. O'Neil and D. Shasha},
+ Booktitle = {SIGMOD '96: Proceedings of the 1996 ACM SIGMOD international conference on Management of data},
+ Date-Added = {2008-05-19 16:15:02 +0200},
+ Date-Modified = {2008-05-24 18:53:15 +0200},
+ Doi = {http://doi.acm.org/10.1145/233269.233330},
+ Isbn = {0-89791-794-4},
+ Location = {Montreal, Quebec, Canada},
+ Pages = {173--182},
+ Publisher = {ACM},
+ Title = {{The Dangers of Replication and a Solution}},
+ Year = {1996},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/233269.233330}}
+
+ at article{10.1109/2.84898,
+ Address = {Los Alamitos, CA, USA},
+ Author = {J. Gray and D. Siewiorek},
+ Date-Added = {2008-05-19 13:57:18 +0200},
+ Date-Modified = {2008-05-24 18:52:58 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/2.84898},
+ Issn = {0018-9162},
+ Journal = {Computer},
+ Number = {9},
+ Pages = {39-48},
+ Publisher = {IEEE Computer Society},
+ Title = {High-Availability Computer Systems},
+ Volume = {24},
+ Year = {1991},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/2.84898}}
+
+ at article{Gray:1190xi,
+ Author = {J. Gray },
+ Date-Added = {2008-05-19 13:57:14 +0200},
+ Date-Modified = {2008-05-24 18:53:40 +0200},
+ Journal = {IEEE Transactions on Reliability},
+ Number = {4},
+ Pages = {409--418},
+ Title = {{A Census of Tandem System Availability Between 1985 and 1990}},
+ Volume = {39},
+ Year = {1990},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/2.84898}}
+
+ at inproceedings{DBLP:conf/ifip/AvizienisLR04,
+ Author = {A. Avi\v{z}ienis and J.-C. Laprie and B. Randell},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {IFIP Congress Topical Sessions},
+ Crossref = {DBLP:conf/ifip/2004bis},
+ Date-Added = {2008-05-19 13:05:25 +0200},
+ Date-Modified = {2008-06-20 00:05:56 +0200},
+ Pages = {91-120},
+ Title = {{Dependability and its Threats - A Taxonomy}},
+ Year = {2004}}
+
+ at proceedings{DBLP:conf/ifip/2004bis,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {IFIP Congress Topical Sessions},
+ Date-Added = {2008-05-19 13:05:18 +0200},
+ Date-Modified = {2008-05-19 13:05:18 +0200},
+ Editor = {Ren{\'e} Jacquart},
+ Isbn = {1-4020-8156-1},
+ Publisher = {Kluwer},
+ Title = {Building the Information Society, IFIP 18th World Computer Congress, Topical Sessions, 22-27 August 2004, Toulouse, France},
+ Year = {2004}}
+
+ at article{DBLP:journals/tc/Avizinis76,
+ Author = {A. Avi\v{z}ienis},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-19 11:41:37 +0200},
+ Date-Modified = {2008-06-20 09:41:28 +0200},
+ Journal = {IEEE Transactions on Computers},
+ Number = {12},
+ Pages = {1304-1312},
+ Title = {{Fault-Tolerant Systems}},
+ Volume = {25},
+ Year = {1976}}
+
+ at inproceedings{200939,
+ Address = {London, UK},
+ Author = {P. A. Lee},
+ Booktitle = {Workshop on Hardware and Software Architectures for Fault Tolerance: Experiences and Perspectives},
+ Date-Added = {2008-05-19 10:27:09 +0200},
+ Date-Modified = {2008-05-20 09:11:38 +0200},
+ Isbn = {0-387-57767-X},
+ Location = {Le Mont Saint Michel, France},
+ Pages = {171--181},
+ Publisher = {Springer-Verlag},
+ Title = {{Software-Faults: The Remaining Problem in Fault Tolerant Systems?}},
+ Year = {1994}}
+
+ at article{10.1109/32.210303,
+ Address = {Los Alamitos, CA, USA},
+ Author = {R.W. Butler and G.B. Finelli},
+ Date-Added = {2008-05-19 08:44:50 +0200},
+ Date-Modified = {2008-05-19 08:44:58 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/32.210303},
+ Issn = {0098-5589},
+ Journal = {IEEE Transactions on Software Engineering},
+ Number = {1},
+ Pages = {3-12},
+ Publisher = {IEEE Computer Society},
+ Title = {{The Infeasibility of Quantifying the Reliability of Life-Critical Real-Time Software}},
+ Volume = {19},
+ Year = {1993},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/32.210303}}
+
+ at article{DBLP:journals/computer/Chou97,
+ Author = {T. Chou},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-19 08:34:16 +0200},
+ Date-Modified = {2008-06-08 20:14:32 +0200},
+ Journal = {IEEE Computer},
+ Number = {4},
+ Pages = {47-49},
+ Title = {{Beyond Fault Tolerance}},
+ Volume = {30},
+ Year = {1997}}
+
+ at inproceedings{807732,
+ Address = {Los Alamitos, CA, USA},
+ Author = {P. Alsberg and J. Day},
+ Booktitle = {ICSE '76: Proceedings of the 2nd International Conference on Software Engineering},
+ Date-Added = {2008-05-18 23:48:49 +0200},
+ Date-Modified = {2008-07-20 14:15:34 +0200},
+ Location = {San Francisco, California, United States},
+ Pages = {562--570},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{A Principle for Resilient Sharing of Distributed Resources}},
+ Year = {1976}}
+
+ at article{302438,
+ Address = {New York, NY, USA},
+ Author = {N. Budhiraja and K. Marzullo and F. Schneider and S. Toueg},
+ Book = {Distributed Systems (2nd Ed.)},
+ Date-Added = {2008-05-18 23:45:03 +0200},
+ Date-Modified = {2008-06-19 22:49:29 +0200},
+ Isbn = {0-201-62427-3},
+ Pages = {199--216},
+ Publisher = {ACM Press/Addison-Wesley Publishing Co.},
+ Title = {{The Primary-Backup Approach}},
+ Year = {1993}}
+
+ at article{227231,
+ Address = {New York, NY, USA},
+ Author = {Flaviu Cristian},
+ Date-Added = {2008-05-18 22:22:48 +0200},
+ Date-Modified = {2008-05-19 18:17:34 +0200},
+ Doi = {http://doi.acm.org/10.1145/227210.227231},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {4},
+ Pages = {88--97},
+ Publisher = {ACM},
+ Title = {{Synchronous and Asynchronous Group Communication}},
+ Volume = {39},
+ Year = {1996},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/227210.227231}}
+
+ at book{Jalote:1998le,
+ Address = {Upper Saddle River, New Jersey, USA},
+ Author = {P. Jalote},
+ Date-Added = {2008-05-18 21:55:53 +0200},
+ Date-Modified = {2008-05-25 23:16:04 +0200},
+ Publisher = {Prentice-Hall, Inc.},
+ Title = {{Fault Tolerance in Distributed Systems}},
+ Year = {1998}}
+
+ at techreport{mishra92abstractions,
+ Author = {S. Mishra and R. Schlichting},
+ Date-Added = {2008-05-18 21:54:52 +0200},
+ Date-Modified = {2008-05-18 21:55:01 +0200},
+ Number = {TR 92 -12},
+ Title = {{Abstractions for Constructing Dependable Distributed Systems}},
+ Url = {citeseer.ist.psu.edu/97395.html},
+ Year = {1992},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/97395.html}}
+
+ at inproceedings{808469,
+ Address = {New York, NY, USA},
+ Author = {A. Avi\v{z}ienis},
+ Booktitle = {Proceedings of the international conference on Reliable software},
+ Date-Added = {2008-05-18 17:51:18 +0200},
+ Date-Modified = {2008-06-20 09:41:22 +0200},
+ Doi = {http://doi.acm.org/10.1145/800027.808469},
+ Location = {Los Angeles, California},
+ Pages = {458--464},
+ Publisher = {ACM},
+ Title = {{Fault-Tolerance and Fault-Intolerance: Complementary Approaches to Reliable Computing}},
+ Year = {1975},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/800027.808469}}
+
+ at inproceedings{DBLP:conf/sigmod/PattersonGK88,
+ Author = {D. Patterson and G. Gibson and R. Katz},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {SIGMOD Conference},
+ Crossref = {DBLP:conf/sigmod/88},
+ Date-Added = {2008-05-18 17:39:58 +0200},
+ Date-Modified = {2008-06-16 10:35:35 +0200},
+ Ee = {http://doi.acm.org/10.1145/50202.50214, db/conf/sigmod/PattersonGK88.html},
+ Pages = {109-116},
+ Title = {{A Case for Redundant Arrays of Inexpensive Disks (RAID)}},
+ Year = {1988}}
+
+ at proceedings{DBLP:conf/sigmod/88,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-05-18 17:39:50 +0200},
+ Date-Modified = {2008-05-18 17:39:50 +0200},
+ Editor = {Haran Boral and Per-{\AA}ke Larson},
+ Publisher = {ACM Press},
+ Title = {Proceedings of the 1988 ACM SIGMOD International Conference on Management of Data, Chicago, Illinois, June 1-3, 1988},
+ Year = {1988}}
+
+ at incollection{huang95software,
+ Author = {Y. Huang and C. Kintala},
+ Booktitle = {Software Fault Tolerance},
+ Date-Added = {2008-05-18 14:19:35 +0200},
+ Date-Modified = {2008-06-08 18:40:41 +0200},
+ Editor = {M. R. Lyu},
+ Pages = {231--248},
+ Publisher = {John Wiley \& sons},
+ Title = {{Software Fault-Tolerance in the Application Layer}},
+ Url = {citeseer.ist.psu.edu/huang95software.html},
+ Year = {1995},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/huang95software.html}}
+
+ at misc{oasis08,
+ Date-Added = {2008-05-17 21:51:38 +0200},
+ Date-Modified = {2008-05-17 21:52:25 +0200},
+ Howpublished = {\url{http://www.oasis-open.org/}},
+ Title = {{OASIS Consortium}},
+ Year = {2008}}
+
+ at book{WSARCH,
+ Date-Added = {2008-05-16 12:22:16 +0200},
+ Date-Modified = {2008-06-22 18:13:32 +0200},
+ Editor = {D. Booth and H. Haas and F.McCabe and E. Newcomer and M. Champion and C. Ferris and D. Orchard},
+ Howpublished = {\url{http://www.w3.org/TR/ws-arch/}},
+ Publisher = {W3C},
+ Title = {{Web Services Architecture}},
+ Year = {2004}}
+
+ at book{Clark:1999eq,
+ Author = {James Clark and Steve DeRose},
+ Date-Added = {2008-05-16 12:04:11 +0200},
+ Date-Modified = {2008-05-16 12:05:48 +0200},
+ Howpublished = {\url{http://www.w3.org/TR/xpath}},
+ Publisher = {W3C},
+ Title = {XML Path Language (XPath) Version 1.0 XML Path Language (XPath) Version 1.0},
+ Year = {1999}}
+
+ at book{Christensen:2001kl,
+ Author = {E. Christensen and F. Curbera and G. Meredith and S. Weerawarana},
+ Date-Added = {2008-05-16 11:53:03 +0200},
+ Date-Modified = {2008-06-28 11:51:34 +0200},
+ Howpublished = {\url{http://www.w3.org/TR/wsdl}},
+ Publisher = {W3C},
+ Title = {{Web Services Description Language (WSDL) 1.1}},
+ Year = {2001}}
+
+ at book{Banks:2006bs,
+ Author = {T. Banks},
+ Date-Added = {2008-05-16 11:44:30 +0200},
+ Date-Modified = {2008-07-20 14:18:10 +0200},
+ Howpublished = {\url{http://docs.oasis-open.org/wsrf/wsrf-primer-1.2-primer-cd-02.pdf}},
+ Publisher = {OASIS},
+ Title = {Web Services Resource Framework (WSRF) -- Primer v1.2},
+ Year = {2006}}
+
+ at inproceedings{DBLP:conf/gcc/XieT04,
+ Author = {Y. Xie and Y. Teo},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {GCC},
+ Crossref = {DBLP:conf/gcc/2004},
+ Date-Added = {2008-05-16 10:22:40 +0200},
+ Date-Modified = {2008-06-20 13:34:09 +0200},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3251{\&}spage=17},
+ Pages = {17-25},
+ Title = {{State Management Issues and Grid Services}},
+ Year = {2004}}
+
+ at proceedings{DBLP:conf/gcc/2004,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {GCC},
+ Date-Added = {2008-05-16 10:22:31 +0200},
+ Date-Modified = {2008-06-20 13:35:05 +0200},
+ Editor = {H. Jin and Y. Pan and N. Xiao and J. Sun},
+ Isbn = {3-540-23564-7},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Grid and Cooperative Computing - GCC 2004: Third International Conference, Wuhan, China, October 21-24, 2004. Proceedings},
+ Volume = {3251},
+ Year = {2004}}
+
+ at inproceedings{Sugita:1999rm,
+ Author = { Sugita, Y. and Okamoto, Y.},
+ Booktitle = {Chemical Physics Letters},
+ Date-Added = {2008-05-15 22:16:01 +0200},
+ Date-Modified = {2008-05-15 22:16:01 +0200},
+ Pages = {141--151},
+ Title = {{Replica-Exchange Molecular Dynamics Method for Protein Folding}},
+ Volume = {314},
+ Year = {1999}}
+
+ at inproceedings{1025879,
+ Address = {Washington, DC, USA},
+ Author = {John C. S. Lui and Vishal Misra and Dan Rubenstein},
+ Booktitle = {ICNP '04: Proceedings of the Network Protocols, 12th IEEE International Conference},
+ Date-Added = {2008-05-15 17:41:29 +0200},
+ Date-Modified = {2008-05-15 17:41:29 +0200},
+ Isbn = {0-7695-2161-4},
+ Pages = {50--60},
+ Publisher = {IEEE Computer Society},
+ Title = {On the Robustness of Soft State Protocols},
+ Year = {2004}}
+
+ at inproceedings{52336,
+ Address = {New York, NY, USA},
+ Author = {D. Clark},
+ Booktitle = {SIGCOMM '88: Symposium proceedings on Communications architectures and protocols},
+ Date-Added = {2008-05-15 17:38:08 +0200},
+ Date-Modified = {2008-05-15 17:38:28 +0200},
+ Doi = {http://doi.acm.org/10.1145/52324.52336},
+ Isbn = {0-89791-279-9},
+ Location = {Stanford, California, United States},
+ Pages = {106--114},
+ Publisher = {ACM},
+ Title = {{The Design Philosophy of the DARPA Internet Protocols}},
+ Year = {1988},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/52324.52336}}
+
+ at inproceedings{Bogden:2007xq,
+ Author = {P. S. Bogden and T. Gale and G. Allen and J. MacLaren and G. Almes and G. Creager and J. Bintz and L. D. Wright and H. Graber and N. Williams and S. Graves and H. Conover and K. Galluppi and R. Luettich and W. Perrie and B. Toulany and Y. P. Sheng and J. R. Davis and H. Wang and D. Forrest},
+ Booktitle = {Marine Technology Society Journal},
+ Date-Added = {2008-05-14 15:22:34 +0200},
+ Date-Modified = {2008-05-14 15:26:44 +0200},
+ Number = {1},
+ Pages = {53--71},
+ Title = {{Architecture of a Community Infrastructure for Predicting and Analyzing Coastal Inundation}},
+ Volume = {41},
+ Year = {2007}}
+
+ at misc{citeulike:1561095,
+ Author = {O'Reilly, Tim },
+ Booktitle = {Perspectives on Free and Open Source Software},
+ Citeulike-Article-Id = {1561095},
+ Date-Added = {2008-05-14 13:06:53 +0200},
+ Date-Modified = {2008-05-20 09:13:41 +0200},
+ Keywords = {quals-commons},
+ Posted-At = {2007-08-14 22:53:32},
+ Priority = {2},
+ Title = {{Open Source Paradigm Shift}},
+ Url = {http://tim.oreilly.com/articles/paradigmshift\_0504.html},
+ Year = {2005},
+ Bdsk-Url-1 = {http://tim.oreilly.com/articles/paradigmshift%5C_0504.html}}
+
+ at manual{glite2008,
+ Author = {Stephen Burke and Simone Campana and Patricia Me\'ndez Lorenzo and Christopher Nater and Roberto Santinelli and Andrea Sciab\`a},
+ Date-Added = {2008-05-13 16:43:52 +0200},
+ Date-Modified = {2008-05-13 16:48:25 +0200},
+ Howpublished = {\url{https://edms.cern.ch/file/722398/1.2/gLite-3-UserGuide.pdf}},
+ Organization = {EGEE/CERN},
+ Title = {{gLite 3.1 User Guide}},
+ Year = {2008}}
+
+ at techreport{Gusowski:2008rq,
+ Address = {Potsdam},
+ Author = {M. Gusowski and A. Luckow and B. Schnor and M. Sch\"utte},
+ Date-Added = {2008-05-09 11:26:03 +0200},
+ Date-Modified = {2008-05-29 20:11:22 +0200},
+ Institution = {University of Potsdam},
+ Month = {05},
+ Title = {{Experiences with a Resilient, MPI-based Master-Worker Application in a Failure-Prone Grid Environment}},
+ Year = {2008}}
+
+ at inproceedings{Luckow:2008hq,
+ Address = {Boston, USA},
+ Author = {A. Luckow and B. Schnor},
+ Booktitle = {7th IEEE International Symposium on Network Computing and Applications},
+ Date-Added = {2008-05-09 11:24:59 +0200},
+ Date-Modified = {2008-05-24 18:55:55 +0200},
+ Title = {Adaptive Checkpoint Replication for Supporting the Fault Tolerance of Applications in the Grid Adaptive Checkpoint Replication for Supporting the Fault Tolerance of Applications in the Grid Adaptive Checkpoint Replication for Supporting the Fault Tolerance of Applications in the Grid Adaptive Checkpoint Replication for Supporting the Fault Tolerance of Applications in the Grid},
+ Year = {2008}}
+
+ at inproceedings{bieker94reconfiguration,
+ Author = {B. Bieker and E. Maehle and G. Deconinck and J. Vounckx},
+ Booktitle = {European Dependable Computing Conference},
+ Date-Added = {2008-05-07 13:12:42 +0200},
+ Date-Modified = {2008-06-28 16:22:14 +0200},
+ Pages = {353-370},
+ Title = {{Reconfiguration and Checkpointing in Massively Parallel Systems}},
+ Url = {citeseer.ist.psu.edu/bieker94reconfiguration.html},
+ Year = {1994},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/bieker94reconfiguration.html}}
+
+ at inproceedings{PSL96,
+ Address = {Universit{\"a}t Z{\"u}rich, Institut f{\"u}r Informatik},
+ Author = {S. Petri and B. Schnor and H. Langend{\"o}rfer},
+ Booktitle = {siwork96},
+ Date-Added = {2008-05-07 13:00:56 +0200},
+ Date-Modified = {2008-05-24 18:56:46 +0200},
+ Editor = {Clemens H. Cap},
+ Isbn = {3-7281-2342-0},
+ Month = may,
+ Optbooktitle = {Proceedings of SIWORK'96},
+ Optorganization = {Universit{\"a}t Z{\"u}rich},
+ Pages = {91--102},
+ Publisher = {vdf Hochschulverlag AG an der ETH Z{\"u}rich},
+ Reviewed = {yes},
+ Title = {{PBeam~-- Fehlertoleranz f\"ur verteilte Anwendungen mittels Migration und Checkpointing}},
+ Year = {1996}}
+
+ at inproceedings{1267905,
+ Address = {Berkeley, CA, USA},
+ Author = {Eduardo Pinheiro and Wolf-Dietrich Weber and Luiz Andr\'{e} Barroso},
+ Booktitle = {FAST '07: Proceedings of the 5th USENIX conference on File and Storage Technologies},
+ Date-Added = {2008-05-06 16:09:00 +0200},
+ Date-Modified = {2008-05-06 16:09:39 +0200},
+ Location = {San Jose, CA},
+ Pages = {2--2},
+ Publisher = {USENIX Association},
+ Title = {{Failure Trends in a Large Disk Drive Population}},
+ Year = {2007}}
+
+ at article{1458051,
+ Author = {J. Gray and M. Anderton},
+ Date-Added = {2008-05-06 15:50:58 +0200},
+ Date-Modified = {2008-05-24 18:53:31 +0200},
+ Issn = {0018-9219},
+ Journal = {Proceedings of the IEEE},
+ Number = {5},
+ Pages = { 719-726},
+ Title = {{Distributed Computer Systems: Four Case Studies}},
+ Volume = {75},
+ Year = {May 1987}}
+
+ at article{357371,
+ Address = {New York, NY, USA},
+ Author = {R. Schlichting and F. Schneider},
+ Date-Added = {2008-05-06 13:55:32 +0200},
+ Date-Modified = {2008-06-19 22:49:42 +0200},
+ Doi = {http://doi.acm.org/10.1145/357369.357371},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems},
+ Number = {3},
+ Pages = {222--238},
+ Publisher = {ACM},
+ Title = {{Fail-Stop Processors: An Approach to Designing Fault-Tolerant Computing Systems}},
+ Volume = {1},
+ Year = {1983},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/357369.357371}}
+
+ at article{1312787,
+ Address = {Washington, DC, USA},
+ Author = {D. A. Rennels},
+ Date-Added = {2008-05-06 13:15:48 +0200},
+ Date-Modified = {2008-05-06 13:15:48 +0200},
+ Doi = {http://dx.doi.org/10.1109/TC.1984.1676390},
+ Issn = {0018-9340},
+ Journal = {IEEE Trans. Comput.},
+ Number = {12},
+ Pages = {1116--1129},
+ Publisher = {IEEE Computer Society},
+ Title = {Fault-Tolerant Computing Concepts and Examples},
+ Volume = {33},
+ Year = {1984},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/TC.1984.1676390}}
+
+ at inproceedings{875631,
+ Address = {Washington, DC, USA},
+ Author = {Nick Kolettis and N. Dudley Fulton},
+ Booktitle = {FTCS '95: Proceedings of the Twenty-Fifth International Symposium on Fault-Tolerant Computing},
+ Date-Added = {2008-05-06 12:57:01 +0200},
+ Date-Modified = {2008-05-06 12:57:10 +0200},
+ Pages = {381},
+ Publisher = {IEEE Computer Society},
+ Title = {{Software Rejuvenation: Analysis, Module and Applications}},
+ Year = {1995}}
+
+ at book{ieee_90,
+ Author = {{The Institute of Electrical and Electronics Engineers (IEEE)}},
+ Date-Added = {2008-05-06 11:27:36 +0200},
+ Date-Modified = {2008-05-06 11:28:33 +0200},
+ Posted-At = {2006-01-30 15:35:50},
+ Priority = {2},
+ Title = {IEEE 90: IEEE Standard Glossary of Software Engineering Terminology},
+ Year = {1990}}
+
+ at inproceedings{808467,
+ Address = {New York, NY, USA},
+ Author = {B. Randell},
+ Booktitle = {Proceedings of the international conference on Reliable software},
+ Date-Added = {2008-05-06 10:05:07 +0200},
+ Date-Modified = {2008-05-06 10:05:25 +0200},
+ Doi = {http://doi.acm.org/10.1145/800027.808467},
+ Location = {Los Angeles, California},
+ Pages = {437--449},
+ Publisher = {ACM},
+ Title = {{System Structure for Software Fault Tolerance}},
+ Year = {1975},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/800027.808467}}
+
+ at article{1083803,
+ Address = {New York, NY, USA},
+ Author = {Marianne Winslett},
+ Date-Added = {2008-05-05 23:15:21 +0200},
+ Date-Modified = {2008-05-06 16:38:28 +0200},
+ Doi = {http://doi.acm.org/10.1145/1083784.1083803},
+ Issn = {0163-5808},
+ Journal = {SIGMOD Rec.},
+ Number = {2},
+ Pages = {71--79},
+ Publisher = {ACM},
+ Title = {{Bruce Lindsay Speaks Out}},
+ Volume = {34},
+ Year = {2005},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1083784.1083803}}
+
+ at article{102801,
+ Address = {New York, NY, USA},
+ Author = {Flaviu Cristian},
+ Date-Added = {2008-05-05 22:23:53 +0200},
+ Date-Modified = {2008-05-19 18:18:20 +0200},
+ Doi = {http://doi.acm.org/10.1145/102792.102801},
+ Issn = {0001-0782},
+ Journal = {Communications of the ACM},
+ Number = {2},
+ Pages = {56--78},
+ Publisher = {ACM},
+ Title = {{Understanding Fault-Tolerant Distributed Systems}},
+ Volume = {34},
+ Year = {1991},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/102792.102801}}
+
+ at article{214121,
+ Address = {New York, NY, USA},
+ Author = {Michael J. Fischer and Nancy A. Lynch and Michael S. Paterson},
+ Date-Added = {2008-05-05 21:46:28 +0200},
+ Date-Modified = {2008-05-05 21:46:47 +0200},
+ Doi = {http://doi.acm.org/10.1145/3149.214121},
+ Issn = {0004-5411},
+ Journal = {J. ACM},
+ Number = {2},
+ Pages = {374--382},
+ Publisher = {ACM},
+ Title = {{Impossibility of Distributed Consensus with one Faulty Process}},
+ Volume = {32},
+ Year = {1985},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/3149.214121}}
+
+ at book{236297,
+ Address = {Cambridge, MA, USA},
+ Author = {Eric S. Raymond},
+ Date-Added = {2008-05-05 13:00:03 +0200},
+ Date-Modified = {2008-05-05 13:00:24 +0200},
+ Edition = {3},
+ Isbn = {0-262-68092-0},
+ Publisher = {MIT Press},
+ Title = {{The New Hacker's Dictionary}},
+ Year = {1996}}
+
+ at article{10.1109/MC.2007.55,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Michael Grottke and Kishor S. Trivedi},
+ Date-Added = {2008-05-05 11:44:17 +0200},
+ Date-Modified = {2008-05-05 11:44:17 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/MC.2007.55},
+ Issn = {0018-9162},
+ Journal = {Computer},
+ Number = {2},
+ Pages = {107-109},
+ Publisher = {IEEE Computer Society},
+ Title = {Fighting Bugs: Remove, Retry, Replicate, and Rejuvenate},
+ Volume = {40},
+ Year = {2007},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/MC.2007.55}}
+
+ at inproceedings{gray86why,
+ Author = {J. Gray},
+ Booktitle = {Symposium on Reliability in Distributed Software and Database Systems},
+ Date-Added = {2008-05-05 11:33:04 +0200},
+ Date-Modified = {2008-05-24 18:53:46 +0200},
+ Keywords = {Cluster Computing},
+ Pages = {3-12},
+ Title = {{Why Do Computers Stop and What Can Be Done About It?}},
+ Url = {citeseer.ist.psu.edu/gray85why.html},
+ Year = {1986},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/gray85why.html}}
+
+ at article{42283,
+ Address = {New York, NY, USA},
+ Author = {Cynthia Dwork and Nancy Lynch and Larry Stockmeyer},
+ Date-Added = {2008-05-04 18:31:01 +0200},
+ Date-Modified = {2008-05-18 21:28:02 +0200},
+ Doi = {http://doi.acm.org/10.1145/42282.42283},
+ Issn = {0004-5411},
+ Journal = {J. ACM},
+ Number = {2},
+ Pages = {288--323},
+ Publisher = {ACM},
+ Title = {{Consensus in the Presence of Partial Synchrony}},
+ Volume = {35},
+ Year = {1988},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/42282.42283}}
+
+ at inproceedings{pruyne96managing,
+ Author = {Jim Pruyne and Miron Livny},
+ Booktitle = {Job Scheduling Strategies for Parallel Processing, {IPPS}'96 Workshop)},
+ Date-Added = {2008-05-03 18:14:17 +0200},
+ Date-Modified = {2008-05-03 18:14:17 +0200},
+ Editor = {Dror G. Feitelson and Larry Rudolph},
+ Pages = {140--154},
+ Publisher = {Springer},
+ Title = {{Managing Checkpoints for Parallel Programs}},
+ Url = {citeseer.ist.psu.edu/85290.html},
+ Volume = {1162},
+ Year = {1996},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/85290.html}}
+
+ at article{Sankaran:2005it,
+ Author = {Sriram Sankaran and Jeffrey M. Squyres and Brian Barrett and Andrew Lumsdaine and Jason Duell and Paul Hargrove and Eric Roman},
+ Date-Added = {2008-05-03 14:29:02 +0200},
+ Date-Modified = {2008-05-08 18:53:04 +0200},
+ Journal = {International Journal of High Performance Computing Applications},
+ Keywords = {MPI, checkpoint/restart, rollback-recovery},
+ Month = {Winter},
+ Number = 4,
+ Pages = {479--493},
+ Title = {{The LAM/MPI Checkpoint/Restart Framework: System-Initiated Checkpointing}},
+ Volume = 19,
+ Year = 2005}
+
+ at article{322188,
+ Address = {New York, NY, USA},
+ Author = {M. Pease and R. Shostak and L. Lamport},
+ Date-Added = {2008-05-03 12:22:25 +0200},
+ Date-Modified = {2008-06-28 11:58:03 +0200},
+ Doi = {http://doi.acm.org/10.1145/322186.322188},
+ Issn = {0004-5411},
+ Journal = {Journal of the ACM},
+ Number = {2},
+ Pages = {228--234},
+ Publisher = {ACM},
+ Title = {{Reaching Agreement in the Presence of Faults}},
+ Volume = {27},
+ Year = {1980},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/322186.322188}}
+
+ at inproceedings{781513,
+ Address = {New York, NY, USA},
+ Author = {Greg Bronevetsky and Daniel Marques and Keshav Pingali and Paul Stodghill},
+ Booktitle = {PPoPP '03: Proceedings of the ninth ACM SIGPLAN symposium on Principles and practice of parallel programming},
+ Date-Added = {2008-05-03 11:27:10 +0200},
+ Date-Modified = {2008-05-03 11:28:21 +0200},
+ Doi = {http://doi.acm.org/10.1145/781498.781513},
+ Isbn = {1-58113-588-2},
+ Location = {San Diego, California, USA},
+ Pages = {84--94},
+ Publisher = {ACM},
+ Title = {{Automated Application-Level Checkpointing of MPI Programs}},
+ Year = {2003},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/781498.781513}}
+
+ at inproceedings{1006248,
+ Address = {New York, NY, USA},
+ Author = {S. Agarwal and R. Garg and M. Gupta and J. Moreira},
+ Booktitle = {ICS '04: Proceedings of the 18th annual international conference on Supercomputing},
+ Date-Added = {2008-05-03 10:58:31 +0200},
+ Date-Modified = {2008-06-19 09:15:54 +0200},
+ Doi = {http://doi.acm.org/10.1145/1006209.1006248},
+ Isbn = {1-58113-839-3},
+ Location = {Malo, France},
+ Pages = {277--286},
+ Publisher = {ACM},
+ Title = {{Adaptive Incremental Checkpointing for Massively Parallel Systems}},
+ Year = {2004},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1006209.1006248}}
+
+ at article{DBLP:journals/cluster/AgbariaF03,
+ Author = {Adnan Agbaria and Roy Friedman},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-04-07 22:08:27 -0400},
+ Date-Modified = {2008-04-07 22:09:45 -0400},
+ Ee = {http://dx.doi.org/10.1023/A:1023540604208},
+ Journal = {Cluster Computing},
+ Number = {3},
+ Pages = {227-236},
+ Title = {{Starfish: Fault-Tolerant Dynamic MPI Programs on Clusters of Workstations}},
+ Volume = {6},
+ Year = {2003}}
+
+ at article{DBLP:journals/pik/RombergE01,
+ Author = {Mathilde Romberg and Dietmar W. Erwin},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-04-04 15:17:44 +0200},
+ Date-Modified = {2008-04-04 15:17:44 +0200},
+ Journal = {Praxis der Informationsverarbeitung und Kommunikation},
+ Number = {2},
+ Title = {UNICORE-Uniformes interface f{\"u}r Computer Ressourcen},
+ Volume = {24},
+ Year = {2001}}
+
+ at book{Mattson:2004fu,
+ Author = {Timothy Mattson and Beverly Sanders and Berna Massingill},
+ Date-Added = {2008-03-29 19:06:01 +0100},
+ Date-Modified = {2008-03-29 19:07:16 +0100},
+ Publisher = {Pearson Education Inc.},
+ Title = {Patterns For Parallel Programming},
+ Year = {2004}}
+
+ at misc{Badia:2007zp,
+ Author = {R. Badia and R. Hood and T. Kielmann and C. Morin and S. Pickles and P. Stodghill and N. Stone and H. Yeom},
+ Date-Added = {2008-03-29 18:16:06 +0100},
+ Date-Modified = {2008-05-25 13:48:25 +0200},
+ Editor = {A. Merzky and P. Stodghill},
+ Howpublished = {Grid Forum Document GFD.92},
+ Month = {May},
+ Note = {Open Grid Forum},
+ Title = {{Use-Cases and Requirements for Grid Checkpoint and Recovery}},
+ Year = 2007}
+
+ at techreport{Vaidya94b,
+ Abstract = {{This report deals with the design and evaluation of a
+ ``two-level'' failure recovery scheme for distributed systems. In
+ our previous work we motivated a ``two-level'' recovery approach that
+ tolerates the more probable failures with a low overhead, and less
+ probable failures with possibly higher overhead. The two-level
+ approach can achieve a smaller overhead as compared to traditional
+ recovery schemes. The contributions of this report are summarized
+ below:
+ \begin{itemize}
+ \item We present and evaluate a ``two-level'' recovery scheme that is
+ suitable for a network of workstations, each workstation having a
+ local disk. The recovery scheme presented in the report can
+ tolerate transient processor failures with a low overhead, while
+ other failures require a larger overhead. The report presents
+ analysis of the average (expected) task completion time using the
+ proposed scheme. This scheme has been implemented on a
+ workstation cluster. Our analysis indicates that the proposed
+ two-level recovery scheme can achieve better performance as
+ compared to existing ``one-level'' recovery schemes.
+ \item The report also evaluates the impact of checkpoint latency on the
+ performance of the recovery scheme. To our knowledge, no analysis
+ of the performance impact of checkpoint latency has been carried
+ out previously.
+ \item Experimental measurements of checkpoint latency and checkpoint
+ overhead for four applications are presented.
+ \end{itemize}
+}},
+ Address = {College Station, TX 77843-3112},
+ Author = {N. Vaidya},
+ Contact-Url = {mailto:vaidya at cs.tamu.edu},
+ Date-Added = {2008-03-28 23:06:17 +0100},
+ Date-Modified = {2008-06-22 18:27:29 +0200},
+ Institution = {Texas A\&M University},
+ Month = dec,
+ Number = {94-068},
+ Title = {Another Two-Level Failure Recovery Scheme: Performance Impact of Checkpoint Placement and Checkpoint Latency},
+ Url = {ftp://ftp.cs.tamu.edu/pub/vaidya/fault-tolerance/94-068.ps.Z},
+ Year = {1994},
+ Bdsk-Url-1 = {ftp://ftp.cs.tamu.edu/pub/vaidya/fault-tolerance/94-068.ps.Z}}
+
+ at techreport{DeVoCuLa93,
+ Address = {Belgium},
+ Author = {Geert Deconinck and Johan Vounckx and Rudi Cuyvers and Rudy Lauwereins},
+ Date-Added = {2008-03-28 23:06:04 +0100},
+ Date-Modified = {2008-03-28 23:25:24 +0100},
+ Institution = {Katholieke Universiteit Leuven},
+ Title = {{Survey of Checkpointing and Rollback Techniques}},
+ Type = {Technical Report},
+ Year = {1993}}
+
+ at inproceedings{Pruyne96,
+ Author = {Jim Pruyne and Miron Livny},
+ Booktitle = {Job Scheduling Strategies for Parallel Processing, IPPS'96 Workshop)},
+ Crossref = {IPPS96},
+ Date-Added = {2008-03-28 23:05:51 +0100},
+ Date-Modified = {2008-03-28 23:05:51 +0100},
+ Editor = {Dror G. Feitelson and Larry Rudolph},
+ Month = apr,
+ Pages = {140--154},
+ Publisher = {Springer},
+ Series = lncs,
+ Title = {{Managing Checkpoints for Parallel Programs}},
+ Volume = {1162},
+ Year = {1996}}
+
+ at misc{Globus-Alliance:2007dz,
+ Author = {{Globus Alliance}},
+ Date-Added = {2008-03-28 20:49:25 +0100},
+ Date-Modified = {2008-03-28 23:39:09 +0100},
+ Howpublished = {\url{http://www.globus.org/toolkit/docs/4.0/techpreview/datarep/}},
+ Title = {{Data Replication Service (DRS)}},
+ Year = {2008}}
+
+ at inproceedings{cite-key,
+ Date-Added = {2008-03-28 20:49:05 +0100},
+ Date-Modified = {2008-03-28 20:49:05 +0100}}
+
+ at inproceedings{Bieker:1997yq,
+ Address = {West Sussex, UK},
+ Author = {Bieker, B. and Deconinck, G. and Maehle, E. and Vounckx, J.},
+ Booktitle = {International Journal of Computer Science, Systems \& Engineering},
+ Date-Added = {2008-03-28 16:15:34 +0100},
+ Date-Modified = {2008-03-28 23:26:18 +0100},
+ Organization = {CRL Publishing Ltd.},
+ Pages = {245-254},
+ Title = {{Fault-Tolerant Routing, Reconfiguration and Backward Error Recovery for Parallel Systems}},
+ Volume = {12},
+ Year = {1997}}
+
+ at proceedings{ftpds92,
+ Address = {Amherst, MA},
+ Date-Added = {2008-03-28 16:11:47 +0100},
+ Date-Modified = {2008-03-28 16:11:47 +0100},
+ Editor = {Niraj {Fussell, Donald S.; Jha}},
+ Isbn = {0-8186-2870-7},
+ Month = jul,
+ Publisher = {IEEE Computer Society Press},
+ Title = {Proceedings of the {IEEE} Workshop on Fault-Tolerant Parallel and Distributed Systems},
+ Year = {1992}}
+
+ at inproceedings{BaBiMa92,
+ Address = {Amherst, MA},
+ Author = {A. Bauch and B. Bieker and E. Maehle},
+ Booktitle = {Workshop on Fault-Tolerant Parallel and Distributed Systems},
+ Date-Added = {2008-03-28 16:11:18 +0100},
+ Date-Modified = {2008-03-28 16:11:18 +0100},
+ From = {belg Survey on ... Based on Checkpointing and Rollback (kurz)},
+ Month = jul,
+ Owner = {?},
+ Pages = {36--43},
+ Title = {{Backward Error Recovery in the Dynamical Reconfigurable Multiprocessor System {DAMP}}},
+ Year = {1992}}
+
+ at misc{Allen:2003xy,
+ Author = {G. Allen and C. MacMahon and E. Seidel and T. Tierney},
+ Date-Added = {2008-03-28 16:11:04 +0100},
+ Date-Modified = {2008-06-19 09:11:41 +0200},
+ Howpublished = {\url{http://www.cct.lsu.edu/~gallen/Reports/LONI_ConceptPaper.pdf}},
+ Title = {{LONI Concept Paper}},
+ Year = {2003}}
+
+ at inproceedings{1339244,
+ Address = {Washington, DC, USA},
+ Author = {Adrian Colesa and Teodor Pop and Iosif Ignat and Cosmin Ardelean},
+ Booktitle = {SYNASC '07: Proceedings of the Ninth International Symposium on Symbolic and Numeric Algorithms for Scientific Computing},
+ Date-Added = {2008-03-28 08:16:40 +0100},
+ Date-Modified = {2008-03-28 22:34:25 +0100},
+ Doi = {http://dx.doi.org/10.1109/SYNASC.2007.69},
+ Isbn = {0-7695-3078-8},
+ Pages = {310--316},
+ Publisher = {IEEE Computer Society},
+ Title = {{Automatic and Reliable Distribution of Data in Grids over Globus Toolkit}},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/SYNASC.2007.69}}
+
+ at inproceedings{652840,
+ Address = {London, UK},
+ Author = {K. Ranganathan and I. Foster},
+ Booktitle = {GRID '01: Proceedings of the Second International Workshop on Grid Computing},
+ Date-Added = {2008-03-26 20:21:13 +0100},
+ Date-Modified = {2008-06-30 19:46:16 +0200},
+ Isbn = {3-540-42949-2},
+ Pages = {75--86},
+ Publisher = {Springer-Verlag},
+ Title = {{Identifying Dynamic Replication Strategies for a High-Performance Data Grid}},
+ Year = {2001}}
+
+ at inproceedings{Allcock:2004kh,
+ Author = {W. Allcock and I. Foster and R. Madduri},
+ Booktitle = {Building Service Based Grids Workshop},
+ Date-Added = {2008-03-26 17:26:29 +0100},
+ Date-Modified = {2008-06-19 09:13:49 +0200},
+ Howpublished = {\url{http://www.globus.org/alliance/publications/papers/GGF11_RFTV-Final.pdf}},
+ Title = {{Reliable Data Transport: A Critical Service for the Grid}},
+ Year = {2004},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/700746.html}}
+
+ at inproceedings{Jeske:2007wj,
+ Address = {Baden-Baden, Germany},
+ Author = {J. Jeske and A. Luckow and B. Schnor},
+ Booktitle = {Proceedings of German e-Science Conference},
+ Date-Added = {2008-03-26 16:03:16 +0100},
+ Date-Modified = {2008-05-24 18:55:18 +0200},
+ Title = {{Reservation-based Resource-Brokering for Grid Computing}},
+ Year = {2007}}
+
+ at inproceedings{762798,
+ Address = {Los Alamitos, CA, USA},
+ Author = {A. Chervenak and E. Deelman and I. Foster and L. Guy and W. Hoschek and A. Iamnitchi and C. Kesselman and P. Kunszt and M. Ripeanu and B. Schwartzkopf and H. Stockinger and K. Stockinger and B. Tierney},
+ Booktitle = {Supercomputing '02: Proceedings of the 2002 ACM/IEEE conference on Supercomputing},
+ Date-Added = {2008-03-26 10:20:06 +0100},
+ Date-Modified = {2008-06-30 19:44:44 +0200},
+ Location = {Baltimore, Maryland},
+ Pages = {1--17},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{Giggle: A Framework for Constructing Scalable Replica Location Services}},
+ Year = {2002}}
+
+ at inproceedings{1252273,
+ Address = {Washington, DC, USA},
+ Author = {P. Tr\"oger and H. Rajic and A. Haas and P. Domagalski},
+ Booktitle = {CCGRID '07: Proceedings of the Seventh IEEE International Symposium on Cluster Computing and the Grid},
+ Date-Added = {2008-03-24 17:25:00 +0100},
+ Date-Modified = {2008-06-22 18:11:47 +0200},
+ Doi = {http://dx.doi.org/10.1109/CCGRID.2007.109},
+ Isbn = {0-7695-2833-3},
+ Pages = {619--626},
+ Publisher = {IEEE Computer Society},
+ Title = {{Standardization of an API for Distributed Resource Management Systems}},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/CCGRID.2007.109}}
+
+ at misc{saga-cpr,
+ Author = {A. Merzky},
+ Date-Added = {2008-03-23 21:38:12 +0100},
+ Date-Modified = {2008-05-25 13:50:02 +0200},
+ Title = {{SAGA CPR Draft}},
+ Year = 2008}
+
+ at inproceedings{Kaiser:2006qp,
+ Address = {Portland, OR, USA},
+ Author = {Kaiser, H. and Merzky, A. and Hirmer, S. and Allen, G.},
+ Booktitle = {{Object-Oriented Programming, Systems, Languages and Applications (OOPSLA'06) - Library-Centric Software Design (LCSD'06)}},
+ Date-Added = {2008-03-23 21:22:46 +0100},
+ Date-Modified = {2008-05-25 13:49:44 +0200},
+ Howpublished = {\url{http://saga.cct.lsu.edu/publications/saga_paper-the_saga_cpp_reference_implementation_oopsla06.pdf}},
+ Month = {October, 22-26},
+ Title = {{The SAGA C++ Reference Implementation}},
+ Year = {2006}}
+
+ at book{citeulike:2086185,
+ Abstract = {{<B>An eye-opening look at the new computer revolution and the coming transformation of our economy, society, and culture.</B><BR><BR>A hundred years ago, companies stopped producing their own power with steam engines and generators and plugged into the newly built electric grid. The cheap power pumped out by electric utilities not only changed how businesses operated but also brought the modern world into existence. Today a similar revolution is under way. Companies are dismantling their private computer systems and tapping into rich services delivered over the Internet. This time it's computing that's turning into a utility. The shift is already remaking the computer industry, bringing new competitors like Google to the fore and threatening traditional stalwarts like Microsoft and Dell. But the effects will reach much further. Cheap computing will ultimately change society as profoundly as cheap electricity did. In this lucid and compelling book, Nicholas Carr weaves together history, economics, and technology to explain why computing is changing\&\#151;and what it means for all of us.}},
+ Author = {Carr, N. },
+ Citeulike-Article-Id = {2086185},
+ Date-Added = {2008-03-23 14:34:54 +0100},
+ Date-Modified = {2008-06-22 18:27:20 +0200},
+ Howpublished = {Hardcover},
+ Isbn = {0393062287},
+ Keywords = {cloud, computing, future, grid, network},
+ Month = {January},
+ Priority = {3},
+ Publisher = {{W. W. Norton}},
+ Title = {The Big Switch: Rewiring the World, from Edison to Google},
+ Url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/0393062287},
+ Year = {2008},
+ Bdsk-Url-1 = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20%5C&path=ASIN/0393062287}}
+
+ at article{RePEc:aea:aecrev:v:80:y:1990:i:2:p:355-61,
+ Author = {David, Paul A},
+ Date-Added = {2008-03-23 14:33:30 +0100},
+ Date-Modified = {2008-03-23 14:33:30 +0100},
+ Journal = {American Economic Review},
+ Month = {May},
+ Number = {2},
+ Pages = {355-61},
+ Title = {The Dynamo and the Computer: An Historical Perspective on the Modern Productivity Paradox},
+ Url = {http://ideas.repec.org/a/aea/aecrev/v80y1990i2p355-61.html},
+ Volume = {80},
+ Year = 1990,
+ Bdsk-Url-1 = {http://ideas.repec.org/a/aea/aecrev/v80y1990i2p355-61.html}}
+
+ at article{615952,
+ Address = {Piscataway, NJ, USA},
+ Author = {M. Chetty and R. Buyya},
+ Date-Added = {2008-03-23 14:31:50 +0100},
+ Date-Modified = {2008-06-22 18:29:08 +0200},
+ Doi = {http://dx.doi.org/10.1109/MCISE.2002.1014981},
+ Issn = {1521-9615},
+ Journal = {Computing in Science and Engg.},
+ Number = {4},
+ Pages = {61--71},
+ Publisher = {IEEE Educational Activities Department},
+ Title = {{Weaving Computational Grids: How Analogous Are They with Electrical Grids?}},
+ Volume = {4},
+ Year = {2002},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/MCISE.2002.1014981}}
+
+ at inproceedings{Parashar:2005wd,
+ Author = {M. Parashar and J. C. Browne},
+ Booktitle = {Proceedings of the IEEE, Special Issue on Grid Computing},
+ Date-Added = {2008-03-23 13:51:15 +0100},
+ Date-Modified = {2008-03-23 13:56:15 +0100},
+ Month = {March},
+ Number = {3},
+ Pages = {653-668},
+ Publisher = {IEEE Press},
+ Title = {Conceptual and Implementation Models for the Grid},
+ Volume = {93},
+ Year = {2005}}
+
+ at misc{Parashar:2008gf,
+ Author = {Manish Parashar},
+ Date-Added = {2008-03-23 11:32:05 +0100},
+ Date-Modified = {2008-05-20 09:13:59 +0200},
+ Howpublished = {\url{http://www.mardigrasconference.org/slides/dpa-parashar.pdf}},
+ Title = {{Autonomics for Computational Science \& Engineering}},
+ Year = {2008}}
+
+ at inproceedings{DBLP:conf/eagc/KovacsK04,
+ Author = {J. Kov{\'a}cs and P. Kacsuk},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {European Across Grids Conference},
+ Crossref = {DBLP:conf/eagc/2004},
+ Date-Added = {2008-03-14 11:15:00 -0500},
+ Date-Modified = {2008-06-22 22:34:15 +0200},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3165{\&}spage=80},
+ Pages = {80-89},
+ Title = {{A Migration Framework for Executing Parallel Programs in the Grid}},
+ Year = {2004}}
+
+ at proceedings{DBLP:conf/eagc/2004,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {European Across Grids Conference},
+ Date-Added = {2008-03-14 11:15:00 -0500},
+ Date-Modified = {2008-03-14 11:15:00 -0500},
+ Editor = {Marios D. Dikaiakos},
+ Isbn = {3-540-22888-8},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Grid Computing, Second European Across Grids Conference, AxGrids 2004, Nicosia, Cyprus, January 28-30, 2004, Revised Papers},
+ Volume = {3165},
+ Year = {2004}}
+
+ at article{945906,
+ Address = {Seattle, WA, USA},
+ Author = {I. Haddad and C. Leangsuksun and S. Scott},
+ Date-Added = {2008-03-06 22:23:22 -0600},
+ Date-Modified = {2008-05-28 22:32:52 +0200},
+ Issn = {1075-3583},
+ Journal = {Linux Journal},
+ Number = {115},
+ Pages = {1},
+ Publisher = {Specialized Systems Consultants, Inc.},
+ Title = {{HA-OSCAR: The Birth of Highly Available OSCAR}},
+ Volume = {2003},
+ Year = {2003}}
+
+ at inproceedings{dagman,
+ Author = {Dagman},
+ Booktitle = {\url{http://www.cs.wisc.edu/condor/manual/v7.0/condor-V7_0_1-Manual.pdf}},
+ Citeulike-Article-Id = {1025156},
+ Date-Added = {2008-03-06 20:39:31 -0600},
+ Date-Modified = {2008-03-06 20:44:19 -0600},
+ Keywords = {bibtex-import},
+ Priority = {2},
+ Title = {{Directed Acyclic Graph Manager}},
+ Year = {2008}}
+
+ at inproceedings{conf/IEEEscc/ZhaoHCFLNRSW07,
+ Author = {Y. Zhao and M. Hategan and B. Clifford and I. Foster and G. von Laszewski and V. Nefedova and I. Raicu and T. Stef-Praun and M. Wilde},
+ Booktitle = {IEEE SCW},
+ Date = {2007-07-13},
+ Date-Added = {2008-03-06 17:39:40 -0600},
+ Date-Modified = {2008-06-30 19:43:50 +0200},
+ Description = {dblp},
+ Ee = {http://doi.ieeecomputersociety.org/10.1109/SERVICES.2007.63},
+ Keywords = {dblp },
+ Pages = {199-206},
+ Publisher = {IEEE Computer Society},
+ Title = {Swift: Fast, Reliable, Loosely Coupled Parallel Computation.},
+ Url = {http://dblp.uni-trier.de/db/conf/IEEEscc/scw2007.html#ZhaoHCFLNRSW07},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dblp.uni-trier.de/db/conf/IEEEscc/scw2007.html#ZhaoHCFLNRSW07}}
+
+ at article{1125406,
+ Address = {Amsterdam, The Netherlands, The Netherlands},
+ Author = {Falk Neubauer and Andreas Hoheisel and Joachim Geiler},
+ Date-Added = {2008-03-06 17:37:31 -0600},
+ Date-Modified = {2008-05-20 09:13:11 +0200},
+ Doi = {http://dx.doi.org/10.1016/j.future.2005.08.002},
+ Issn = {0167-739X},
+ Journal = {Future Generation Computer Systems -- The International Journal of Grid Computing: Theory, Methods and Application},
+ Number = {1},
+ Pages = {6--15},
+ Publisher = {Elsevier Science Publishers B. V.},
+ Title = {{Workflow-based Grid Applications}},
+ Volume = {22},
+ Year = {2006},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.future.2005.08.002}}
+
+ at article{1239653,
+ Address = {Amsterdam, The Netherlands, The Netherlands},
+ Author = {E. Deelman and G. Singh and M.-H. Su and J. Blythe and Y. Gil and C. Kesselman and G. Mehta and K. Vahi and G. Berriman and J. Good and A. Laity and J. Jacob and D. Katz},
+ Date-Added = {2008-03-06 16:57:46 -0600},
+ Date-Modified = {2008-06-22 17:27:52 +0200},
+ Issn = {1058-9244},
+ Journal = {Sci. Program.},
+ Number = {3},
+ Pages = {219--237},
+ Publisher = {IOS Press},
+ Title = {{Pegasus: A Framework for Mapping Complex Scientific Workflows onto Distributed Systems}},
+ Volume = {13},
+ Year = {2005}}
+
+ at article{journals/corr/cs-DC-0206040,
+ Author = {N. Karonis and B. Toonen and I. Foster},
+ Biburl = {http://www.bibsonomy.org/bibtex/2ed353efc8b0d5acc7bc9028982c7ee65/dblp},
+ Date = {2008-01-02},
+ Date-Added = {2008-03-06 16:44:37 -0600},
+ Date-Modified = {2008-06-30 19:48:08 +0200},
+ Description = {dblp},
+ Ee = {http://arxiv.org/abs/cs.DC/0206040},
+ Journal = {CoRR},
+ Keywords = {dblp },
+ Note = {informal publication},
+ Title = {{MPICH-G2: A Grid-Enabled Implementation of the Message Passing Interface}},
+ Url = {http://dblp.uni-trier.de/db/journals/corr/corr0206.html#cs-DC-0206040},
+ Volume = {cs.DC/0206040},
+ Year = {2002},
+ Bdsk-Url-1 = {http://dblp.uni-trier.de/db/journals/corr/corr0206.html#cs-DC-0206040}}
+
+ at unpublished{ES-Schnetter2008b,
+ Author = {E. Schnetter},
+ Date-Added = {2008-03-03 16:40:57 -0600},
+ Date-Modified = {2008-06-16 10:37:08 +0200},
+ Month = {January},
+ Note = {Invited talk given at the Distributed Programming Abstractions Workshop of the 15th Mardi Gras Conference in Baton Rouge, LA},
+ Pdf = {doc/BatonRouge-2008-MardiGras-CactusDistributed.pdf},
+ Title = {Cactus Concepts for Distributed HPC Applications},
+ Url = {http://www.mardigrasconference.org/},
+ Year = 2008,
+ Bdsk-Url-1 = {http://www.mardigrasconference.org/}}
+
+ at unpublished{ES-Schnetter2008c,
+ Author = {E. Schnetter},
+ Date-Added = {2008-03-03 16:40:34 -0600},
+ Date-Modified = {2008-06-22 17:47:07 +0200},
+ Month = {February},
+ Note = {Talk given at the 15th Mardi Gras Conference in Baton Rouge, LA},
+ Pdf = {doc/BatonRouge-2008-MardiGras-GammaRayBursts.pdf},
+ Title = {{A Case Study for Petascale Applications in Astrophysics: Simulating Gamma-Ray Bursts}},
+ Url = {http://www.mardigrasconference.org/},
+ Year = 2008,
+ Bdsk-Url-1 = {http://www.mardigrasconference.org/}}
+
+ at unpublished{ES-Schnetter2007d,
+ Author = {E. Schnetter},
+ Date-Added = {2008-03-03 16:40:08 -0600},
+ Date-Modified = {2008-06-16 10:37:01 +0200},
+ Month = {October},
+ Note = {Talk given to LSU relativity group in Baton Rouge, LA},
+ Pdf = {doc/BatonRouge-2007-BBHFactory.pdf},
+ Title = {{The {BBH} Factory: {Herding} Simulations}},
+ Url = {http://relativity.phys.lsu.edu/},
+ Year = 2007,
+ Bdsk-Url-1 = {http://relativity.phys.lsu.edu/}}
+
+ at misc{Konig:2007hc,
+ Author = {Arne K\"onig and Max Ostrowski and G\"unther Nie\ss},
+ Date-Added = {2008-02-28 16:00:53 -0600},
+ Date-Modified = {2008-05-20 09:10:45 +0200},
+ Title = {{Graphentheoretische Berechnung f\"ur Bioinformatische Fragestellungen}},
+ Year = {2007}}
+
+ at misc{openmp:nx,
+ Date-Added = {2008-02-28 14:30:46 -0600},
+ Date-Modified = {2008-02-28 14:31:58 -0600},
+ Howpublished = {\url{http://www.openmp.org/mp-documents/spec25.pdf}},
+ Key = {openmp},
+ Title = {{OpenMP Application Program Interface -- Version 2.5}},
+ Year = {2005}}
+
+ at inproceedings{1341845,
+ Address = {New York, NY, USA},
+ Author = {O. Weidner and J.-C. Bidal},
+ Booktitle = {MG '08: Proceedings of the 15th ACM Mardi Gras conference},
+ Date-Added = {2008-02-28 10:12:30 -0600},
+ Date-Modified = {2008-06-16 10:29:13 +0200},
+ Doi = {http://doi.acm.org/10.1145/1341811.1341845},
+ Isbn = {978-1-59593-835-0},
+ Location = {Baton Rouge, Louisiana},
+ Pages = {1--1},
+ Publisher = {ACM},
+ Title = {{Shrimp Farming on the Grid}},
+ Year = {2008},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1341811.1341845}}
+
+ at misc{buyya00nimrodg,
+ Author = {Rajkumar Buyya and David Abramson and Jonathan Giddy},
+ Date-Added = {2008-02-28 10:10:53 -0600},
+ Date-Modified = {2008-05-20 09:06:31 +0200},
+ Text = {R. Buyya, D. Abramson, and J. Giddy, Nimrod/G: An Architecture for a Resource Management and Scheduling System in a Global Computational Grid, HPC ASIA'2000, China, IEEE CS Press, USA, 2000.},
+ Title = {{Nimrod/G: An Architecture for a Resource Management and Scheduling System in a Global Computational Grid}},
+ Url = {citeseer.ist.psu.edu/buyya00nimrodg.html},
+ Year = {2000},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/buyya00nimrodg.html}}
+
+ at article{citeulike:291860,
+ Author = {Frey, J. and Tannenbaum, T. and Livny, M. and Foster, I. and Tuecke, S.},
+ Citeulike-Article-Id = {291860},
+ Date-Added = {2008-02-28 10:08:47 -0600},
+ Date-Modified = {2008-06-30 19:47:43 +0200},
+ OPTDoi = {10.1023/A:1015617019423},
+ Journal = {Cluster Computing},
+ Keywords = {grid, scheduling},
+ Month = {July},
+ Number = {3},
+ Pages = {237--246},
+ Priority = {2},
+ OPTTitle = {{Condor-G: A Computation Management Agent for Multi-Institutional Grids}},
+ OPTUrl = {http://dx.doi.org/10.1023/A:1015617019423},
+ Volume = {5},
+ Year = {2002},
+ OPTBdsk-Url-1 = {http://dx.doi.org/10.1023/A:1015617019423},
+ OPTBdsk-Url-2 = {http://dx.doi.org/10.1023/A:1015617019423}}
+
+ at article{chu:mrm,
+ Author = {C.T. Chu and S.K. Kim and Y.A. Lin and Y.Y. Yu and G. Bradski and A.Y. Ng and K. Olukotun},
+ Date-Added = {2008-02-28 10:00:00 -0600},
+ Date-Modified = {2008-02-28 10:00:00 -0600},
+ Keywords = {datamining machinelearning mapreduce multicore parallel },
+ Title = {{Map-Reduce for Machine Learning on Multicore}},
+ Year = {2006}}
+
+ at misc{Hadoop:2008mz,
+ Date-Added = {2008-02-28 09:37:07 -0600},
+ Date-Modified = {2008-02-28 09:48:11 -0600},
+ Howpublished = {\url{http://hadoop.apache.org/}},
+ Key = {Hadoop},
+ Title = {{Hadoop: Open Source Implementation of MapReduce}},
+ Year = {2008}}
+
+ at book{JainTheArt,
+ Author = {Raj Jain},
+ Date-Added = {2008-02-24 21:58:31 -0600},
+ Date-Modified = {2008-02-24 21:58:31 -0600},
+ Month = {May},
+ Pages = {685},
+ Publisher = {Wiley},
+ Title = {The Art of Computer Systems Performance Analysis: techniques for experimental design, measurement, simulation, and modeling},
+ Year = {1991}}
+
+ at techreport{WSSG,
+ Date-Added = {2008-02-24 21:57:56 -0600},
+ Date-Modified = {2008-05-16 12:08:02 +0200},
+ Institution = {OASIS},
+ Key = {WSSG},
+ Title = {{Web Services Service Group 1.2 2 (WS-ServiceGroup)}},
+ Type = {Committee Specification},
+ Year = {2006}}
+
+ at techreport{WSBF,
+ Date-Added = {2008-02-24 21:57:42 -0600},
+ Date-Modified = {2008-02-24 21:57:42 -0600},
+ Institution = {OASIS Open},
+ Key = {WSBF},
+ Title = {Web Service Base Faults 1.2 (WS-BaseFaults)},
+ Type = {Committee Specification},
+ Year = {9 January 2006}}
+
+ at techreport{WSR,
+ Date-Added = {2008-02-24 21:57:32 -0600},
+ Date-Modified = {2008-02-24 21:57:32 -0600},
+ Institution = {OASIS Open},
+ Key = {WSR},
+ Number = {02},
+ Title = {Web Services Resource 1.2 2 (WS-Resource)},
+ Type = {Public Review Draft},
+ Year = {6 October 2005}}
+
+ at techreport{WSRP,
+ Date-Added = {2008-02-24 21:57:22 -0600},
+ Date-Modified = {2008-05-16 12:00:50 +0200},
+ Institution = {OASIS},
+ Key = {WSRP},
+ Title = {{Web Services Resource Properties 1.2 (WS-ResourceProperties)}},
+ Type = {Committee Specification},
+ Year = {2006}}
+
+ at techreport{WSRL,
+ Date-Added = {2008-02-24 21:57:10 -0600},
+ Date-Modified = {2008-05-16 12:08:28 +0200},
+ Institution = {OASIS},
+ Key = {WSRL},
+ Title = {{Web Services Resource Lifetime 1.2 (WS-ResourceLifetime)}},
+ Type = {Committee Specification},
+ Year = {2006}}
+
+ at misc{AMIGA,
+ Author = {Alexander Knebe},
+ Date-Added = {2008-02-24 21:55:22 -0600},
+ Date-Modified = {2008-05-23 11:35:36 +0200},
+ Howpublished = {\href{http://www.aip.de/People/AKnebe/AMIGA/}{http://www.aip.de/People/AKnebe/AMIGA/}},
+ Key = {AMIGA},
+ Title = {{AMIGA Project Homepage}},
+ Year = {2008}}
+
+ at article{MLAPM,
+ Author = {Alexander Knebe and Andrew Green and James Binney},
+ Date-Added = {2008-02-24 21:55:12 -0600},
+ Date-Modified = {2008-05-08 17:35:54 +0200},
+ Howpublished = {\url{http://www.aip.de/People/AKnebe/MLAPM/download/mlapm-paper.pdf}},
+ Journal = {Monthly Notices of the Royal Astronomical Society},
+ Pages = {845-864},
+ Title = {{MLAPM - A C Code For Cosmological Simulations}},
+ Volume = {325},
+ Year = {2001}}
+
+ at techreport{WSRT,
+ Author = {Brian Reistad and Bryan Murray and Doug Davis and Ian Robinson and Raymond McCollum and Alexander Nosov and Steve Graham and Vijay Tewari and William Vambenepe},
+ Date-Added = {2008-02-24 21:49:05 -0600},
+ Date-Modified = {2008-05-20 09:14:56 +0200},
+ Institution = {IBM, HP, Microsoft, Intel},
+ Month = {August},
+ Number = {1.0},
+ Title = {{Web Services Resource Transfer (WS-RT)}},
+ Year = {2006}}
+
+ at article{642200,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Jeffrey O. Kephart and David M. Chess},
+ Date-Added = {2008-02-24 20:52:47 -0600},
+ Date-Modified = {2008-02-24 21:06:04 -0600},
+ Doi = {http://dx.doi.org/10.1109/MC.2003.1160055},
+ Issn = {0018-9162},
+ Journal = {Computer},
+ Number = {1},
+ Pages = {41--50},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{The Vision of Autonomic Computing}},
+ Volume = {36},
+ Year = {2003},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/MC.2003.1160055}}
+
+ at misc{Foster:2006fy,
+ Author = {I. Foster and Kishimoto and D. Berry and A. Djaoui and A. Grimshaw and B. Horn and F. Maciel and F. Siebenlist and R. Subramaniam and J. Treadwell and J. Von Reich},
+ Date-Added = {2008-02-24 20:36:14 -0600},
+ Date-Modified = {2008-02-24 20:38:46 -0600},
+ Howpublished = {Grid Forum Document GFD.80},
+ Note = {Global Grid Forum},
+ Title = {{The Open Grid Services Architecture, Version 1.5}},
+ Year = 2006}
+
+ at misc{saga-req,
+ Author = {A. Merzky and S. Jha},
+ Date-Added = {2008-02-24 20:36:03 -0600},
+ Date-Modified = {2008-05-25 13:45:57 +0200},
+ Howpublished = {Open Grid Forum Document GFD.71},
+ Note = {Open Grid Forum},
+ Title = {{A Requirements Analysis for a Simple API for Grid Applications}},
+ Year = 2006}
+
+ at misc{saga-uc,
+ Author = {A. Merzky and S. Jha},
+ Date-Added = {2008-02-24 20:35:56 -0600},
+ Date-Modified = {2008-05-25 13:46:49 +0200},
+ Howpublished = {Open Grid Forum Document GFD.70},
+ Note = {Open Grid Forum},
+ Title = {{A Collection of Use Cases for a Simple API for Grid Applications}},
+ Year = 2006}
+
+ at misc{gridcpr,
+ Author = {D. Simmel and T. Kielmann},
+ Date-Added = {2008-02-24 20:35:42 -0600},
+ Date-Modified = {2008-05-25 13:49:25 +0200},
+ Editor = {N. Stone and A. Merzky},
+ Howpublished = {Grid Forum Document GFD.93},
+ Month = {May},
+ Note = {Open Grid Forum},
+ Title = {{An Architecture for Grid Checkpoint and Recovery Services}},
+ Year = 2007}
+
+ at misc{cpr-uc,
+ Author = {R. Badia and R. Hood and T. Kielmann and A. Merzky and C. Morin and S. Pickles and M. Sgaravatto and P. Stodghill and Nathan Stone and Heon Y. Yeom},
+ Date-Added = {2008-02-24 20:35:32 -0600},
+ Date-Modified = {2008-05-25 13:49:05 +0200},
+ Howpublished = {Grid Forum Document GFD.92},
+ Month = {May},
+ Note = {Open Grid Forum},
+ Title = {{Use Cases for Grid Checkpoint and Recovery}},
+ Year = 2007}
+
+ at misc{JSDL,
+ Author = {A. Anjomshoaa and F. Brisard and M. Drescher and D. Fellows and A. Ly and S. McGough and D. Pulsipher},
+ Date-Added = {2008-02-24 20:33:57 -0600},
+ Date-Modified = {2008-07-20 14:17:35 +0200},
+ Howpublished = {\url{http://www.gridforum.org/documents/GFD.56.pdf}},
+ Title = {{Job Submission Description Language (JSDL) Specification 1.0}},
+ Year = {2005}}
+
+ at misc{Matsuoko:2008vl,
+ Author = {Satoshi Matsuoko},
+ Date-Added = {2008-02-23 23:18:47 -0600},
+ Date-Modified = {2008-02-28 13:46:05 -0600},
+ Howpublished = {\url{http://www.mardigrasconference.org/slides/a4-matsuoka.pdf}},
+ Month = {01},
+ Title = {{To Distribute or Not to Distribute, That is the Question in Petascale and Beyond (Keynote)}},
+ Year = {2008}}
+
+ at inproceedings{1251264,
+ Address = {Berkeley, CA, USA},
+ Author = {Jeffrey Dean and Sanjay Ghemawat},
+ Booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation},
+ Date-Added = {2008-02-23 20:47:01 -0600},
+ Date-Modified = {2008-05-12 23:43:10 +0200},
+ Location = {San Francisco, CA},
+ Pages = {137--150},
+ Publisher = {USENIX Association},
+ Title = {{MapReduce: Simplified Data Processing on Large Clusters}},
+ Year = {2004}}
+
+ at incollection{Allen:2003rw,
+ Author = {G. Allen and T. Goodale and M. Russell and E. Seidel and J. Shalf},
+ Booktitle = {Grid Computing: Making the Global Infrastructure a Reality},
+ Date-Added = {2008-02-23 19:55:21 -0600},
+ Date-Modified = {2008-06-19 09:11:18 +0200},
+ Editor = {Berman, F. and Fox, G. and Hey, T.},
+ Keywords = {WP3},
+ Pages = {555--578},
+ Publisher = {Wiley},
+ Title = {{Classifying and Enabling Grid Applications}},
+ Year = {2003}}
+
+ at incollection{LeeTalGC03,
+ Author = {Lee, Craig and Talia, Domenico},
+ Booktitle = {Grid Computing: Making the Global Infrastructure a Reality},
+ Date-Added = {2008-02-23 19:53:26 -0600},
+ Date-Modified = {2008-02-23 19:53:26 -0600},
+ Editor = {Berman, F. and Fox, G. and Hey, T.},
+ Keywords = {WP3},
+ Pages = {555--578},
+ Publisher = {Wiley},
+ Title = {Grid Programming Models: Current Tools, Issues and Directions},
+ Year = {2003}}
+
+ at inproceedings{conf/pvm/DuarteRL06,
+ Author = {Angelo Duarte and Dolores Rexachs and Emilio Luque},
+ Booktitle = {Proceedings of the 13th EuroPVM/MPI},
+ Date = {2006-11-02},
+ Date-Added = {2008-02-22 14:24:02 -0600},
+ Date-Modified = {2008-04-07 22:21:13 -0400},
+ Description = {dblp},
+ Ee = {http://dx.doi.org/10.1007/11846802_26},
+ Isbn = {3-540-39110-X},
+ Keywords = {dblp },
+ Pages = {150-157},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {{An Intelligent Management of Fault Tolerance in Cluster Using RADICMPI}},
+ Url = {http://dblp.uni-trier.de/db/conf/pvm/pvm2006.html#DuarteRL06},
+ Volume = {4192},
+ Year = {2006},
+ Bdsk-Url-1 = {http://dblp.uni-trier.de/db/conf/pvm/pvm2006.html#DuarteRL06}}
+
+ at book{573776,
+ Address = {Secaucus, NJ, USA},
+ Date-Added = {2008-02-18 22:02:06 -0600},
+ Date-Modified = {2008-05-19 11:55:37 +0200},
+ Editor = {J.C. Laprie and A. Avi\v{z}ienis and H. Kopetz},
+ Isbn = {0387822968},
+ Publisher = {Springer-Verlag New York, Inc.},
+ Title = {Dependability: Basic Concepts and Terminology},
+ Year = {1991}}
+
+ at book{Hanmer:2007kx,
+ Address = {West Sussex, England},
+ Author = {R. Hanmer},
+ Date-Added = {2008-02-18 21:54:10 -0600},
+ Date-Modified = {2008-06-22 18:11:13 +0200},
+ Keywords = {Fault Tolerance},
+ Publisher = {John Wiley \& Sons Ltd},
+ Title = {{Patterns For Fault Tolerant Software}},
+ Year = {2007}}
+
+ at article{1742-6596-78-1-012022,
+ Author = {B. Schroeder and G. Gibson},
+ Date-Added = {2008-02-18 21:21:25 -0600},
+ Date-Modified = {2008-06-16 10:35:45 +0200},
+ Journal = {Journal of Physics: Conference Series},
+ Pages = {012022 (11pp)},
+ Title = {{Understanding Failures in Petascale Computers}},
+ Url = {http://stacks.iop.org/1742-6596/78/012022},
+ Volume = {78},
+ Year = {2007},
+ Bdsk-Url-1 = {http://stacks.iop.org/1742-6596/78/012022}}
+
+ at inproceedings{DBLP:conf/iccS/Allen07,
+ Author = {G. Allen},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {International Conference on Computational Science (1)},
+ Crossref = {DBLP:conf/iccS/2007-1},
+ Date-Added = {2008-02-16 23:35:54 -0600},
+ Date-Modified = {2008-06-19 09:12:41 +0200},
+ Ee = {http://dx.doi.org/10.1007/978-3-540-72584-8_136},
+ Pages = {1034-1041},
+ Title = {Building a Dynamic Data Driven Application System for Hurricane Forecasting},
+ Year = {2007}}
+
+ at proceedings{DBLP:conf/iccS/2007-1,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {ICCS (1)},
+ Date-Added = {2008-02-16 23:35:54 -0600},
+ Date-Modified = {2008-02-16 23:35:54 -0600},
+ Editor = {Yong Shi and G. Dick van Albada and Jack Dongarra and Peter M. A. Sloot},
+ Isbn = {978-3-540-72583-1},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Computational Science - ICCS 2007, 7th International Conference Beijing, China, May 27-30, 2007, Proceedings, Part I},
+ Volume = {4487},
+ Year = {2007}}
+
+ at misc{Arnaud:2008uf,
+ Author = {Bill St. Arnaud},
+ Date-Added = {2008-02-16 22:58:25 -0600},
+ Date-Modified = {2008-02-16 23:12:57 -0600},
+ Howpublished = {\url{http://www.mardigrasconference.org/slides/dpa-st-arnaud.pdf}},
+ Title = {{How Cyber-Infrastructure Can Help Reduce Global Warming (Invited Talk)}},
+ Year = {2008}}
+
+ at misc{Carr:2008gd,
+ Author = {N. Carr},
+ Date-Added = {2008-02-16 22:47:02 -0600},
+ Date-Modified = {2008-06-22 18:27:11 +0200},
+ Howpublished = {\url{http://www.roughtype.com/archives/2008/02/amazons_s3_util.php}},
+ Title = {{Crash: Amazon's S3 utility goes down}},
+ Year = {2008}}
+
+ at article{1016902,
+ Address = {Hingham, MA, USA},
+ Author = {Rajanikanth Batchu and Yoginder S. Dandass and Anthony Skjellum and Murali Beddhu},
+ Date-Added = {2008-02-16 22:25:59 -0600},
+ Date-Modified = {2008-02-16 22:25:59 -0600},
+ Doi = {http://dx.doi.org/10.1023/B:CLUS.0000039491.64560.8a},
+ Issn = {1386-7857},
+ Journal = {Cluster Computing},
+ Number = {4},
+ Pages = {303--315},
+ Publisher = {Kluwer Academic Publishers},
+ Title = {{MPI/FT: A Model-Based Approach to Low-Overhead Fault Tolerant Message-Passing Middleware}},
+ Volume = {7},
+ Year = {2004},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1023/B:CLUS.0000039491.64560.8a}}
+
+ at inproceedings{746632,
+ Address = {London, UK},
+ Author = {Graham E. Fagg and Jack Dongarra},
+ Booktitle = {Proceedings of the 7th European PVM/MPI Users' Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface},
+ Date-Added = {2008-02-16 22:25:59 -0600},
+ Date-Modified = {2008-02-16 22:25:59 -0600},
+ Isbn = {3-540-41010-4},
+ Pages = {346--353},
+ Publisher = {Springer-Verlag},
+ Title = {{FT-MPI: Fault Tolerant MPI, Supporting Dynamic Applications in a Dynamic World}},
+ Year = {2000}}
+
+ at inproceedings{1341831,
+ Address = {New York, NY, USA},
+ Author = {C. D. Ott and E. Schnetter and G. Allen and E. Seidel and J. Tao and B. Zink},
+ Booktitle = {MG '08: Proceedings of the 15th ACM Mardi Gras conference},
+ Date-Added = {2008-02-16 21:16:16 -0600},
+ Date-Modified = {2008-06-22 17:47:00 +0200},
+ Doi = {http://doi.acm.org/10.1145/1341811.1341831},
+ Isbn = {978-1-59593-835-0},
+ Location = {Baton Rouge, Louisiana},
+ Pages = {1--9},
+ Publisher = {ACM},
+ Title = {{A Case Study For Petascale Applications in Astrophysics: Simulating Gamma-Ray Bursts}},
+ Year = {2008},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1341811.1341831}}
+
+ at phdthesis{fielding00,
+ Abstract = {The World Wide Web has succeeded in large part because its software architecture has been designed to meet the needs of an Internet-scale distributed hypermedia system. The Web has been iteratively developed over the past ten years through a series of modifications to the standards that define its architecture. In order to identify those aspects of the Web that needed improvement and avoid undesirable modifications, a model for the modern Web architecture was needed to guide its design, definition, and deployment.
+
+Software architecture research investigates methods for determining how best to partition a system, how components identify and communicate with each other, how information is communicated, how elements of a system can evolve independently, and how all of the above can be described using formal and informal notations. My work is motivated by the desire to understand and evaluate the architectural design of network-based application software through principled use of architectural constraints, thereby obtaining the functional, performance, and social properties desired of an architecture. An architectural style is a named, coordinated set of architectural constraints.
+
+This dissertation defines a framework for understanding software architecture via architectural styles and demonstrates how styles can be used to guide the architectural design of network-based application software. A survey of architectural styles for network-based applications is used to classify styles according to the architectural properties they induce on an architecture for distributed hypermedia. I then introduce the Representational State Transfer (REST) architectural style and describe how REST has been used to guide the design and development of the architecture for the modern Web.
+
+REST emphasizes scalability of component interactions, generality of interfaces, independent deployment of components, and intermediary components to reduce interaction latency, enforce security, and encapsulate legacy systems. I describe the software engineering principles guiding REST and the interaction constraints chosen to retain those principles, contrasting them to the constraints of other architectural styles. Finally, I describe the lessons learned from applying REST to the design of the Hypertext Transfer Protocol and Uniform Resource Identifier standards, and from their subsequent deployment in Web client and server software.},
+ Author = {R. Fielding},
+ Biburl = {http://www.bibsonomy.org/bibtex/28e43e9d058f60876e5dd1f9cfcaff933/neilernst},
+ Citeulike-Article-Id = {125581},
+ Date-Added = {2008-02-16 18:59:30 -0600},
+ Date-Modified = {2008-05-25 13:55:33 +0200},
+ Keywords = {architecture rest services web },
+ Priority = {4},
+ School = {University of California, Irvine},
+ Title = {{Architectural Styles and the Design of Network-based Software Architectures}},
+ Url = {http://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm},
+ Year = {2000},
+ Bdsk-Url-1 = {http://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm}}
+
+ at inproceedings{1333540,
+ Address = {Washington, DC, USA},
+ Author = {S. Jha and H. Kaiser and A. Merzky and O. Weidner},
+ Booktitle = {E-SCIENCE '07: Proceedings of the Third IEEE International Conference on e-Science and Grid Computing (e-Science 2007)},
+ Date-Added = {2008-02-16 18:54:59 -0600},
+ Date-Modified = {2008-05-25 13:46:16 +0200},
+ Doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39},
+ Isbn = {0-7695-3064-8},
+ Pages = {584--591},
+ Publisher = {IEEE Computer Society},
+ Title = {Grid Interoperability at the Application Level Using SAGA},
+ Year = {2007},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/E-SCIENCE.2007.39}}
+
+ at misc{ogf08,
+ Date-Added = {2008-02-16 18:20:32 -0600},
+ Date-Modified = {2008-05-17 21:51:27 +0200},
+ Howpublished = {\url{http://www.ogf.org}},
+ Key = {OGF},
+ Title = {{Open Grid Forum}},
+ Year = {2008}}
+
+ at misc{Taylor:1999kx,
+ Author = {J. Taylor},
+ Date-Added = {2008-02-16 17:51:17 -0600},
+ Date-Modified = {2008-06-16 10:34:34 +0200},
+ Howpublished = {\url{http://www.nesc.ac.uk/nesc/define.html}},
+ Lastchecked = {02/2008},
+ Title = {{e-Science Definition}},
+ Url = {http://www.nesc.ac.uk/nesc/define.html},
+ Year = {1999},
+ Bdsk-Url-1 = {http://www.nesc.ac.uk/nesc/define.html}}
+
+ at inproceedings{Graham:2006qr,
+ Address = {Bonn, Germany},
+ Author = {Richard Graham},
+ Booktitle = {Invited Talk presented at Euro PVM/MPI},
+ Date-Added = {2008-01-12 16:13:33 +0100},
+ Date-Modified = {2008-01-12 16:14:34 +0100},
+ Title = {Approaches for Parallel Application Fault Tolerance},
+ Year = {2006}}
+
+ at inproceedings{castain05:_open_rte,
+ Address = {Sorrento, Italy},
+ Author = {R. H. Castain and T. S. Woodall and D. J. Daniel and J. M. Squyres and B. Barrett and G .E. Fagg},
+ Booktitle = {Proceedings, 12th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2008-01-12 15:43:32 +0100},
+ Date-Modified = {2008-01-17 16:48:44 +0100},
+ Month = {September},
+ Title = {{The Open Run-Time Environment (OpenRTE): A Transparent Multi-Cluster Environment for High-Performance Computing}},
+ Year = 2005}
+
+ at inproceedings{gabriel04openmpi,
+ Address = {Budapest, Hungary},
+ Author = {Gabriel, Edgar and Fagg, Graham E. and Bosilca, George and Angskun, Thara and Dongarra, Jack J. and Squyres, Jeffrey M. and Sahay, Vishal and Kambadur, Prabhanjan and Barrett, Brian and Lumsdaine, Andrew and Castain, Ralph H. and Daniel, David J. and Graham, Richard L. and Woodall, Timothy S. },
+ Booktitle = {Proceedings, 11th European PVM/MPI Users' Group Meeting},
+ Citeulike-Article-Id = {1681048},
+ Date-Added = {2008-01-06 13:07:04 +0100},
+ Date-Modified = {2008-01-21 16:48:19 +0100},
+ Keywords = {mpi},
+ Month = {September},
+ Pages = {97--104},
+ Priority = {0},
+ Title = {{Open {MPI}: Goals, Concept, and Design of a Next Generation {MPI} Implementation}},
+ Year = {2004}}
+
+ at inproceedings{Luckow:2008ys,
+ Address = {Miami, USA},
+ Author = {A. Luckow and B. Schnor},
+ Booktitle = {Proceedings of Fifth High-Performance Grid Computing Workshop in conjunction with IEEE International Parallel \& Distributed Processing Symposium},
+ Date-Added = {2008-01-03 16:12:08 +0100},
+ Date-Modified = {2008-05-24 18:56:06 +0200},
+ Read = {Yes},
+ Title = {{Service Replication in Grids: Ensuring Consistency in a Dynamic, Failure-Prone Environment}},
+ Year = {2008}}
+
+ at article{DBLP:journals/tods/KungR81,
+ Author = {H. Kung and J. Robinson},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2008-01-03 11:53:15 +0100},
+ Date-Modified = {2008-06-19 16:17:36 +0200},
+ Ee = {http://doi.acm.org/10.1145/319566.319567, db/journals/tods/KungR81.html},
+ Journal = {ACM Trans. Database Syst.},
+ Keywords = {concurrency, Fault Tolerance},
+ Number = {2},
+ Pages = {213-226},
+ Title = {{On Optimistic Methods for Concurrency Control}},
+ Volume = {6},
+ Year = {1981}}
+
+ at inproceedings{la-mpi,
+ Author = {R. T. Aulwes and D. J. Daniel and N. N. Desai and R. L. Graham and L. D. Risinger and M. W. Sukalski and M. A. Taylor},
+ Booktitle = {Euro PVM/MPI 2003},
+ Date-Modified = {2008-06-16 10:34:04 +0200},
+ Doi = {10.1007/b14070},
+ Keywords = {MPI, Fault Tolerance},
+ Owner = {MCS},
+ Pages = {344-351},
+ Pdf = {http://public.lanl.gov/lampi/LA-UR-03-2939.pdf},
+ Publisher = {Springer},
+ Series = {LNCS},
+ Timestamp = {20071029},
+ Title = {Network Fault Tolerance in LA-MPI},
+ Url = {http://public.lanl.gov/lampi/LA-UR-03-2939.pdf},
+ Volume = {2840/2003},
+ Year = {2003},
+ Bdsk-Url-1 = {http://public.lanl.gov/lampi/LA-UR-03-2939.pdf},
+ Bdsk-Url-2 = {http://dx.doi.org/10.1007/b14070}}
+
+ at inproceedings{harness,
+ Author = {M. Beck and J. Dongarra and G. Fagg and G. Geist and P. Gray and J. Kohl and M. Migliardi and K. Moore and T. Moore and P. Papadopoulous and S. Scott and and V. Sunderam},
+ Booktitle = {Future Generation Computer Systems},
+ Date-Modified = {2008-05-24 18:54:15 +0200},
+ Month = oct,
+ Owner = {MCS},
+ Pages = {571--582},
+ Timestamp = {20071115},
+ Title = {{HARNESS}: A Next Generation Distributed Virtual Machine},
+ Url = {http://www.netlib.org/utk/people/JackDongarra/PAPERS/harness2.ps},
+ Volume = {15(5-6)},
+ Year = {1999},
+ Bdsk-Url-1 = {http://www.netlib.org/utk/people/JackDongarra/PAPERS/harness2.ps}}
+
+ at article{mpichv2,
+ Address = {Los Alamitos, CA, USA},
+ Author = {G. Bosilca and A. Bouteiller and F. Cappello and S. Djilali and G. Fedak and C. Germain and T. Herault and P. Lemarinier and O. Lodygensky and F. Magniette and V. Neri and A. Selikhov},
+ Date-Modified = {2008-06-27 23:38:54 +0200},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/SC.2002.10048},
+ Issn = {1063-9535},
+ Journal = {Proceedings of The IEEE/ACM SC2002 Conference},
+ Owner = {MCS},
+ Publisher = {IEEE Computer Society},
+ Timestamp = {20070903},
+ Title = {{MPICH-V: Toward a Scalable Fault Tolerant MPI for Volatile Nodes}},
+ Year = {2002},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/SC.2002.10048}}
+
+ at article{mpichv,
+ Author = {A. Bouteiller and T. Herault and G. Krawezik and P. Lemarinier and F. Cappello},
+ Date-Modified = {2008-05-26 22:18:42 +0200},
+ Journal = {International Journal of High Performance Computing and Applications},
+ Owner = {MCS},
+ Pages = {319-333},
+ Pdf = {http://mpich-v.lri.fr/papers/ijhpca_mpichv.pdf},
+ Timestamp = {20071021},
+ Title = {{MPICH-V: A Multiprotocol Fault Tolerant MPI}},
+ Url = {http://mpich-v.lri.fr/papers/ijhpca_mpichv.pdf},
+ Volume = {20 (3)},
+ Year = {2006},
+ Bdsk-Url-1 = {http://mpich-v.lri.fr/papers/ijhpca_mpichv.pdf}}
+
+ at misc{rfc1122,
+ Author = {R. Braden},
+ Howpublished = {RFC 1122 (Standard)},
+ Month = oct,
+ Number = {1122},
+ Organisation = {Internet Engineering Task Force},
+ Owner = {MCS},
+ Publisher = {IETF},
+ Series = {Request for Comments},
+ Timestamp = {20071118},
+ Title = {{Requirements for Internet Hosts - Communication Layers}},
+ Url = {http://www.ietf.org/rfc/rfc1122.txt},
+ Year = {1989},
+ Bdsk-Url-1 = {http://www.ietf.org/rfc/rfc1122.txt}}
+
+ at inproceedings{mpd,
+ Author = {Ralph Butler and William Gropp and Ewing Lusk},
+ Booktitle = {Recent Advances in Parallel Virtual Machine and Message Passing Interface: 7th European PVM/MPI Users' Group Meeting, Balatonf{\"u}red, Hungary, September 2000. Proceedings},
+ Issn = {0302-9743},
+ Pages = {168+},
+ Publisher = {Springer Berlin/Heidelberg},
+ Series = {LNCS},
+ Timestamp = {20070903},
+ Title = {A Scalable Process-Management Environment for Parallel Programs},
+ Url = {http://www.springerlink.com/content/myr2r8at0183gja6/},
+ Volume = {Volume 1908},
+ Year = {2000},
+ Bdsk-Url-1 = {http://www.springerlink.com/content/myr2r8at0183gja6/}}
+
+ at inproceedings{mpichv-pcl,
+ Author = {Camille Coti and Thomas Herault and Pierre Lemarinier and Laurence Pilard and Ala Rezmerita and Eric Rodriguez and Franck Cappello},
+ Booktitle = {Supercomputing 2006 (SC06)},
+ Owner = {MCS},
+ Pdf = {http://sc06.supercomputing.org/schedule/pdf/pap310.pdf},
+ Timestamp = {20071208},
+ Title = {Blocking vs. Non-Blocking Coordinated Checkpointing for Large-Scale Fault Tolerant MPI},
+ Url = {http://sc06.supercomputing.org/schedule/event_detail.php?evid=9172},
+ Year = {2006},
+ Bdsk-Url-1 = {http://sc06.supercomputing.org/schedule/event_detail.php?evid=9172}}
+
+ at misc{PKnoppix,
+ Author = {Michael Creel},
+ Owner = {MCS},
+ Timestamp = {20071212},
+ Title = {ParallelKnoppix},
+ Url = {http://idea.uab.es/mcreel/ParallelKnoppix/},
+ Year = {2007},
+ Bdsk-Url-1 = {http://idea.uab.es/mcreel/ParallelKnoppix/}}
+
+ at inproceedings{ftmpi,
+ Author = {Fagg, G. and Gabriel, E. and Chen, Z. and Angskun, T. and Bosilca, G. and Pjesivac-Grbovic, J. and Dongarra, J.},
+ Booktitle = {International Journal for High Performance Applications and Supercomputing},
+ Month = apr,
+ Owner = {MCS},
+ Pdf = {http://icl.cs.utk.edu/projectsfiles/lacsi/pubs/lacsi2003-ijhpca04.pdf},
+ Timestamp = {20071021},
+ Title = {Process Fault-Tolerance: Semantics, Design and Applications for High Performance Computing},
+ Url = {http://icl.cs.utk.edu/projectsfiles/lacsi/pubs/lacsi2003-ijhpca04.pdf},
+ Year = {2004},
+ Bdsk-Url-1 = {http://icl.cs.utk.edu/projectsfiles/lacsi/pubs/lacsi2003-ijhpca04.pdf}}
+
+ at inproceedings{extftmpi,
+ Author = {Graham E. Fagg and Thara Angskun and George Bosilca and Jelena Pjesivac-Grbovic and Jack J. Dongarra},
+ Booktitle = {Euro PVM/MPI 2005},
+ Doi = {10.1007/11557265_13},
+ Month = oct,
+ Owner = {MCS},
+ Pages = {67--75},
+ Pdf = {http://icl.cs.utk.edu/projectsfiles/rib/pubs/sftmpi-europvm-mpi-2005.pdf},
+ Timestamp = {20071115},
+ Title = {Scalable Fault Tolerant MPI: Extending the recovery algorithm},
+ Url = {http://icl.cs.utk.edu/projectsfiles/rib/pubs/sftmpi-europvm-mpi-2005.pdf},
+ Year = {2005},
+ Bdsk-Url-1 = {http://icl.cs.utk.edu/projectsfiles/rib/pubs/sftmpi-europvm-mpi-2005.pdf},
+ Bdsk-Url-2 = {http://dx.doi.org/10.1007/11557265_13}}
+
+ at inproceedings{ftmpi-ext,
+ Address = {Heidelberg, Germany},
+ Author = {Graham E. Fagg and Edgar Gabriel and George Bosilca and Thara Angskun and Zhizhong Chen and Jelena Pjesivac-Grbovic and Kevin London and Jack J. Dongarra},
+ Booktitle = {Proceedings of ISC2004},
+ Date-Modified = {2008-01-21 16:47:48 +0100},
+ Month = jun,
+ Owner = {MCS},
+ Pdf = {http://icl.cs.utk.edu/projectsfiles/ftmpi/pubs/isc2004-FT-MPI.pdf},
+ Timestamp = {20071021},
+ Title = {{Extending the MPI Specification for Process Fault Tolerance on High Performance Computing Systems}},
+ Url = {http://icl.cs.utk.edu/projectsfiles/ftmpi/pubs/isc2004-FT-MPI.pdf},
+ Year = {2004},
+ Bdsk-Url-1 = {http://icl.cs.utk.edu/projectsfiles/ftmpi/pubs/isc2004-FT-MPI.pdf}}
+
+ at inproceedings{ompi,
+ Address = {Budapest, Hungary},
+ Author = {Edgar Gabriel and Graham E. Fagg and George Bosilca and Thara Angskun and Jack J. Dongarra and Jeffrey M. Squyres and Vishal Sahay and Prabhanjan Kambadur and Brian Barrett and Andrew Lumsdaine and Ralph H. Castain and David J. Daniel and Richard L. Graham and Timothy S. Woodall},
+ Booktitle = {Proceedings of the 11th EuroPVM/MPI},
+ Date-Modified = {2008-04-07 22:21:47 -0400},
+ Month = sep,
+ Pages = {97--104},
+ Pdf = {http://www.open-mpi.org/papers/euro-pvmmpi-2004-overview/euro-pvmmpi-2004-overview.pdf},
+ Title = {Open {MPI}: Goals, Concept, and Design of a Next Generation {MPI} Implementation},
+ Url = {http://www.open-mpi.org/papers/euro-pvmmpi-2004-overview/},
+ Year = {2004},
+ Bdsk-Url-1 = {http://www.open-mpi.org/papers/euro-pvmmpi-2004-overview/}}
+
+ at inproceedings{ompi-cray,
+ Author = {Richard L. Graham and George Bosilca and Jelena Pje{\v{s}}ivac-Grbovi{\'{c}}},
+ Booktitle = {CUG07},
+ Owner = {MCS},
+ Timestamp = {20071120},
+ Title = {A Comparison of Application Performance Using Open MPI and Cray MPI},
+ Url = {http://icl.cs.utk.edu/projectsfiles/ftmpi/pubs/graham_et-al_application_perf_using_ompi_and_craympi_CUG07.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://icl.cs.utk.edu/projectsfiles/ftmpi/pubs/graham_et-al_application_perf_using_ompi_and_craympi_CUG07.pdf}}
+
+ at inproceedings{ompi2,
+ Address = {Poznan, Poland},
+ Author = {Richard L. Graham and Timothy S. Woodall and Jeffrey M. Squyres},
+ Booktitle = {Proceedings, 6th Annual International Conference on Parallel Processing and Applied Mathematics},
+ Month = sep,
+ Owner = {MCS},
+ Pdf = {http://www.open-mpi.org/papers/ppam-2005/ppam-2005.pdf},
+ Timestamp = {20071116},
+ Title = {Open {MPI}: A Flexible High Performance {MPI}},
+ Url = {http://www.open-mpi.org/papers/ppam-2005/},
+ Year = {2005},
+ Bdsk-Url-1 = {http://www.open-mpi.org/papers/ppam-2005/}}
+
+ at article{Gropp04,
+ Author = {William Gropp and Ewing Lusk},
+ Issn = {1094-3420},
+ Journal = {International Journal of High Performance Computing Applications},
+ Pages = {363-372},
+ Pdf = {http://www-unix.mcs.anl.gov/~lusk/papers/fault-tolerance.pdf},
+ Publisher = {Sage Publications Ltd.},
+ Timestamp = {20070903},
+ Title = {Fault Tolerance in Message Passing Interface Programs},
+ Url = {http://www-unix.mcs.anl.gov/~lusk/papers/fault-tolerance.pdf},
+ Volume = {18.3},
+ Year = {2004},
+ Bdsk-Url-1 = {http://www-unix.mcs.anl.gov/~lusk/papers/fault-tolerance.pdf}}
+
+ at book{Gropp99,
+ Author = {William Gropp and Anthony Skjellum and Ewing Lusk},
+ Owner = {MCS},
+ Publisher = {The MIT Press},
+ Timestamp = {20070903},
+ Title = {Using {MPI}},
+ Year = {1999}}
+
+ at book{Gropp00,
+ Author = {William Gropp and Rajeev Thakur and Ewing Lusk},
+ Owner = {MCS},
+ Publisher = {The MIT Press},
+ Timestamp = {20070903},
+ Title = {Using {MPI-2}},
+ Year = {2000}}
+
+ at misc{ompi_ft_sc06,
+ Author = {Josh Hursey},
+ Howpublished = {Talk},
+ Month = nov,
+ Owner = {MCS},
+ Pdf = {http://www.open-mpi.org/papers/sc-2006/iu-booth-ft-in-ompi.pdf},
+ Timestamp = {20071116},
+ Title = {Dealing with Disaster: Fault Tolerance in Open MPI},
+ Url = {http://www.open-mpi.org/papers/sc-2006/iu-booth-ft-in-ompi.pdf},
+ Year = {2006},
+ Bdsk-Url-1 = {http://www.open-mpi.org/papers/sc-2006/iu-booth-ft-in-ompi.pdf}}
+
+ at techreport{OpenMPI-CRS,
+ Address = {Bloomington, Indiana, USA},
+ Author = {Joshua Hursey and Jeffrey M. Squyres and Andrew Lumsdaine},
+ Institution = {Indiana University},
+ Month = jul,
+ Number = {TR635},
+ Owner = {MCS},
+ Pdf = {http://www.open-mpi.org/papers/iu-cs-tr635/iu-cs-tr635.pdf},
+ Timestamp = {20071116},
+ Title = {A Checkpoint and Restart Service Specification for Open MPI},
+ Url = {http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR635},
+ Year = {2006},
+ Bdsk-Url-1 = {http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR635}}
+
+ at inproceedings{iosup07,
+ Author = {Alexandru Iosup and Mathieu Jan and Ozan Sonmez and Dick H. J. Epema},
+ Booktitle = {8th IEEE/ACM International Conference on Grid Computing (Grid 2007)},
+ Month = apr,
+ Pdf = {http://hal.inria.fr/docs/00/14/35/76/PDF/RR-6172.pdf},
+ Title = {{On the Dynamic Resource Availability in Grids}},
+ Url = {http://hal.inria.fr/inria-00143265/en/},
+ Year = {2007},
+ Bdsk-Url-1 = {http://hal.inria.fr/inria-00143265/en/}}
+
+ at inproceedings{jacobson88,
+ Address = {Stanford, CA},
+ Author = {Van Jacobson},
+ Booktitle = {{ACM} {SIGCOMM} '88},
+ Month = aug,
+ Pages = {314-329},
+ Title = {Congestion Avoidance and Control},
+ Url = {http://ee.lbl.gov/papers/congavoid.pdf},
+ Year = {1988},
+ Bdsk-Url-1 = {http://ee.lbl.gov/papers/congavoid.pdf}}
+
+ at inproceedings{abaris,
+ Author = {Jitsumoto, H. and Endo, T. and Matsuoka, S.},
+ Booktitle = {Parallel and Distributed Processing Symposium (IPDPS 2007)},
+ Doi = {10.1109/IPDPS.2007.370603},
+ Month = mar,
+ Owner = {MCS},
+ Pages = {1-8},
+ Pdf = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4228331},
+ Publisher = {IEEE International},
+ Timestamp = {20071021},
+ Title = {ABARIS: An Adaptable Fault Detection/Recovery Component Framework for MPIs},
+ Url = {http://matsu-www.is.titech.ac.jp/paper/jitsumoto/jitsumoto-dpdns2007-paper.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://matsu-www.is.titech.ac.jp/paper/jitsumoto/jitsumoto-dpdns2007-paper.pdf},
+ Bdsk-Url-2 = {http://dx.doi.org/10.1109/IPDPS.2007.370603}}
+
+ at misc{jails,
+ Author = {Poul-Henning Kamp and Robert N. M. Watson},
+ Howpublished = {presented at the 2nd International System Administration and Networking Conference (SANE 2000)},
+ Month = may,
+ Owner = {MCS},
+ Timestamp = {20071021},
+ Title = {Jails: Confining the omnipotent root},
+ Url = {http://phk.freebsd.dk/pubs/sane2000-jail.pdf},
+ Year = {2000},
+ Bdsk-Url-1 = {http://phk.freebsd.dk/pubs/sane2000-jail.pdf}}
+
+ at misc{TCPFailure,
+ Address = {Information Networking Institute, Carnegie Mellon University},
+ Author = {Christopher C. Lord},
+ Note = {(undatiert)},
+ Pdf = {http://chrisandtrudi.com/Chris/Portfolio/TCP%20Connection%20Survivability%20Paper.pdf},
+ Timestamp = {20070903},
+ Title = {The TCP Maximum Survivable Failure Time},
+ Url = {http://chrisandtrudi.com/Chris/Portfolio/TCP%20Connection%20Survivability%20Paper.pdf},
+ Bdsk-Url-1 = {http://chrisandtrudi.com/Chris/Portfolio/TCP%20Connection%20Survivability%20Paper.pdf}}
+
+ at inproceedings{migol,
+ Author = {A. Luckow and B. Schnor},
+ Booktitle = {Euro PVM/MPI 2005},
+ Date-Modified = {2008-05-24 18:56:19 +0200},
+ Doi = {10.1007/11557265_35},
+ Month = oct,
+ Owner = {MCS},
+ Pages = {258-267},
+ Publisher = {Springer},
+ Timestamp = {20071114},
+ Title = {Migol: A Fault-Tolerant Service Framework for MPI Applications in the Grid},
+ Url = {http://www.springerlink.com/content/4ga2adc9xbvakbhh/},
+ Volume = {3666/2005},
+ Year = {2005},
+ Bdsk-Url-1 = {http://www.springerlink.com/content/4ga2adc9xbvakbhh/},
+ Bdsk-Url-2 = {http://dx.doi.org/10.1007/11557265_35}}
+
+ at book{McKusick04,
+ Author = {Marshall Kirk McKusick},
+ Owner = {MCS},
+ Publisher = {Addison-Wesley},
+ Timestamp = {20070916},
+ Title = {The Design and Implementation of the FreeBSD Operating System},
+ Year = {2004}}
+
+ at misc{mpich2,
+ Author = {MPICH2},
+ Date-Modified = {2008-03-28 22:24:23 +0100},
+ Howpublished = {\url{http://www-unix.mcs.anl.gov/mpi/mpich2/}},
+ Key = {MPICH2},
+ Owner = {MCS},
+ Publisher = {Argonne National Laboratory Group},
+ Timestamp = {20071021},
+ Title = {{MPICH2 Project Homepage}},
+ Year = {2008},
+ Bdsk-Url-1 = {http://www-unix.mcs.anl.gov/mpi/mpich2/}}
+
+ at misc{rfc2988,
+ Author = {V. Paxson and M. Allman},
+ Howpublished = {RFC 2988 (Proposed Standard)},
+ Month = nov,
+ Number = {2988},
+ Organisation = {Internet Engineering Task Force},
+ Owner = {MCS},
+ Publisher = {IETF},
+ Series = {Request for Comments},
+ Timestamp = {20071118},
+ Title = {{Computing TCP's Retransmission Timer}},
+ Url = {http://www.ietf.org/rfc/rfc2988.txt},
+ Year = {2000},
+ Bdsk-Url-1 = {http://www.ietf.org/rfc/rfc2988.txt}}
+
+ at inproceedings{googledisks,
+ Address = {Berkeley, CA, USA},
+ Author = {Pinheiro, Eduardo and Weber, Wolf-Dietrich and Barroso, Luiz A. },
+ Booktitle = {FAST'07: Proceedings of the 5th conference on USENIX Conference on File and Storage Technologies},
+ Citeulike-Article-Id = {1601766},
+ Keywords = {harddisk, storage, work},
+ Pages = {2--2},
+ Pdf = {http://labs.google.com/papers/disk_failures.pdf},
+ Priority = {2},
+ Publisher = {USENIX Association},
+ Title = {Failure trends in a large disk drive population},
+ Url = {http://labs.google.com/papers/disk_failures.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://labs.google.com/papers/disk_failures.pdf}}
+
+ at misc{rfc793,
+ Author = {Jon Postel},
+ Howpublished = {RFC 793 (Standard)},
+ Month = sep,
+ Number = {793},
+ Organisation = {Internet Engineering Task Force},
+ Owner = {MCS},
+ Publisher = {IETF},
+ Series = {Request for Comments},
+ Timestamp = {20070916},
+ Title = {{Transmission Control Protocol}},
+ Url = {http://tools.ietf.org/html/rfc793},
+ Year = {1981},
+ Bdsk-Url-1 = {http://tools.ietf.org/html/rfc793}}
+
+ at inproceedings{Rewaskar,
+ Author = {S. Rewaskar and J. Kaur and F. Smith},
+ Booktitle = {Proceedings of 15th IEEE International Conference on Network Protocols (ICNP07)},
+ Date-Modified = {2008-06-30 19:40:01 +0200},
+ Owner = {MCS},
+ Pdf = {http://www.cs.unc.edu/~rewaskar/publication/ICNP_07.pdf},
+ Timestamp = {20071216},
+ Title = {A Performance Study of Loss Detection/Recovery in Real-world TCP Implementations},
+ Url = {http://www.cs.unc.edu/~rewaskar/publication/ICNP_07.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://www.cs.unc.edu/~rewaskar/publication/ICNP_07.pdf}}
+
+ at article{lam_check_restart,
+ Author = {Sriram Sankaran and Jeffrey M. Squyres and Brian Barrett and Andrew Lumsdaine and Jason Duell and Paul Hargrove and Eric Roman},
+ Date-Modified = {2008-05-08 18:17:29 +0200},
+ Journal = {International Journal of High Performance Computing Applications},
+ Keywords = {MPI, checkpoint/restart, rollback-recovery},
+ Month = {Winter},
+ Number = {4},
+ Owner = {MCS},
+ Pages = {479--493},
+ Pdf = {http://www.lam-mpi.org/papers/lacsi2003/lacsi-2003.pdf},
+ Timestamp = {20071115},
+ Title = {{The LAM/MPI Checkpoint/Restart Framework: System-Initiated Checkpointing}},
+ Url = {http://www.lam-mpi.org/papers/lacsi2003/lacsi-2003.pdf},
+ Volume = {19},
+ Year = {2005},
+ Bdsk-Url-1 = {http://www.lam-mpi.org/papers/lacsi2003/lacsi-2003.pdf}}
+
+ at inproceedings{schroeder,
+ Author = {B. Schroeder and G. Gibson},
+ Booktitle = {Proceedings of the International Conference on Dependable Systems and Networks (DSN2006)},
+ Date-Modified = {2008-06-16 10:35:19 +0200},
+ Owner = {MCS},
+ Pdf = {http://www.pdl.cmu.edu/PDL-FTP/stray/dsn06.pdf},
+ Timestamp = {20071029},
+ Title = {{A Large-Scale Study of Failures in High-Performance Computing Systems}},
+ Url = {http://www.pdl.cmu.edu/PDL-FTP/stray/dsn06.pdf},
+ Year = {2006},
+ Bdsk-Url-1 = {http://www.pdl.cmu.edu/PDL-FTP/stray/dsn06.pdf}}
+
+ at misc{ompi-bof-07,
+ Abstract = {State of the Union for Open MPI.},
+ Author = {Jeffrey M. Squyres and Rainer Keller},
+ Howpublished = {Presented at International Supercomputing Conference (ISC), June 2007, Dresden, Germany.},
+ Month = jun,
+ Owner = {MCS},
+ Pdf = {http://www.open-mpi.org/papers/isc-2007/open-mpi-bof-isc-2007.pdf},
+ Timestamp = {20071021},
+ Title = {Open MPI Community Meeting BOF},
+ Url = {http://www.open-mpi.org/papers/isc-2007/},
+ Year = {2007},
+ Bdsk-Url-1 = {http://www.open-mpi.org/papers/isc-2007/}}
+
+ at book{Stevens94,
+ Author = {W. Richard Stevens},
+ Owner = {MCS},
+ Publisher = {Addison-Wesley},
+ Timestamp = {20070916},
+ Title = {TCP/IP Illustrated, Volume 1},
+ Year = {1994}}
+
+ at book{Tanenbaum07,
+ Author = {A. Tanenbaum and M. van Steen},
+ Date-Modified = {2008-05-27 09:08:17 +0200},
+ Edition = {2},
+ Owner = {MCS},
+ Publisher = {Pearson Prentice Hall},
+ Timestamp = {20071207},
+ Title = {Distributed Systems -- Principles and Paradigms},
+ Year = {2007}}
+
+ at misc{treaster04,
+ Author = {Michael Treaster},
+ Owner = {MCS},
+ Pdf = {http://arxiv.org/pdf/cs/0501002},
+ Timestamp = {20071208},
+ Title = {A Survey of Fault-Tolerance and Fault-Recovery Techniques in Parallel Systems},
+ Url = {http://arxiv.org/abs/cs/0501002},
+ Year = {2004},
+ Bdsk-Url-1 = {http://arxiv.org/abs/cs/0501002}}
+
+ at misc{BLCR,
+ Date-Modified = {2008-05-30 13:08:12 +0200},
+ Howpublished = {\url{http://ftg.lbl.gov/CheckpointRestart/CheckpointRestart.shtml}},
+ Key = {BLCR},
+ Owner = {MCS},
+ Publisher = {Berkeley Fault Tolerance Group},
+ Timestamp = {20071116},
+ Title = {{Berkeley Lab Checkpoint/Restart (BLCR)}},
+ Url = {http://ftg.lbl.gov/CheckpointRestart/CheckpointRestart.shtml},
+ Year = {2008},
+ Bdsk-Url-1 = {http://ftg.lbl.gov/CheckpointRestart/CheckpointRestart.shtml}}
+
+ at book{MPI2,
+ Key = {MPI2},
+ Owner = {MCS},
+ Pdf = {http://www.mpi-forum.org/docs/mpi2-report.pdf},
+ Publisher = {Message Passing Interface Forum},
+ Timestamp = {20070904},
+ Title = {MPI-2: Extensions to the Message-Passing Interface},
+ Url = {http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html},
+ Year = {1997},
+ Bdsk-Url-1 = {http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html}}
+
+ at book{MPI1,
+ Key = {MPI1},
+ Owner = {MCS},
+ Pdf = {http://www.mpi-forum.org/docs/mpi1-report.pdf},
+ Publisher = {Message Passing Interface Forum},
+ Timestamp = {20070904},
+ Title = {MPI: A Message-Passing Interface Standard},
+ Url = {http://www.mpi-forum.org/docs/mpi-11-html/mpi-report.html},
+ Year = {1995},
+ Bdsk-Url-1 = {http://www.mpi-forum.org/docs/mpi-11-html/mpi-report.html}}
+
+ at article{226647,
+ Address = {New York, NY, USA},
+ Author = {T. Chandra and S. Toueg},
+ Date-Added = {2007-12-13 20:32:42 +0100},
+ Date-Modified = {2008-06-27 20:40:16 +0200},
+ Doi = {http://doi.acm.org/10.1145/226643.226647},
+ Issn = {0004-5411},
+ Journal = {Journal of the ACM},
+ Number = {2},
+ Pages = {225--267},
+ Publisher = {ACM},
+ Title = {{Unreliable Failure Detectors for Reliable Distributed Systems}},
+ Volume = {43},
+ Year = {1996},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/226643.226647}}
+
+ at misc{Andreozzi:2007zl,
+ Author = {S. Andreozzi and F. Ehm and L. Field and B. K{\'o}nya},
+ Date-Added = {2007-12-12 17:05:37 +0100},
+ Date-Modified = {2008-06-17 21:22:07 +0200},
+ Howpublished = {\url{https://forge.gridforum.org/sf/projects/glue-wg}},
+ Keywords = {information},
+ Title = {{GLUE Specification}},
+ Url = {https://forge.gridforum.org/sf/projects/glue-wg},
+ Year = {2007},
+ Bdsk-Url-1 = {https://forge.gridforum.org/sf/projects/glue-wg}}
+
+ at misc{Distributed-Management-Task-Force-Inc.-:2007rz,
+ Author = {{Distributed Management Task Force, Inc. }},
+ Date-Added = {2007-12-12 17:00:27 +0100},
+ Date-Modified = {2008-05-20 09:07:38 +0200},
+ Howpublished = {DTMF Web Page},
+ Keywords = {information},
+ Title = {{Common Information Model (CIM) Standard}},
+ Url = {http://www.dmtf.org/standards/cim/},
+ Year = {2007},
+ Bdsk-Url-1 = {http://www.dmtf.org/standards/cim/}}
+
+ at misc{Lowekamp:2004xy,
+ Author = {Bruce Lowekamp and Brian Tierney and Les Cottrell and Richard Hughes-Jones and Thilo Kielmann and Martin Swany},
+ Date-Added = {2007-12-12 16:35:45 +0100},
+ Date-Modified = {2008-05-20 09:12:25 +0200},
+ Keywords = {information, network},
+ Title = {{A Hierarchy of Network Performance Characteristics for Grid Applications and Services}},
+ Url = {http://www.ogf.org/documents/GFD.23.pdf},
+ Year = {2004},
+ Bdsk-Url-1 = {http://www.ogf.org/documents/GFD.23.pdf}}
+
+ at article{10.1109/E-SCIENCE.2006.81,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Ralf Ratering and Alexander Lukichev and Morris Riedel and Daniel Mallmann and A. Vanni and C. Cacciari and S. Lanzarini and K. Benedyczak and M. Borcz and R. Kluszcynski and Piotr Bala and Gert Ohme},
+ Date-Added = {2007-12-10 15:26:52 +0100},
+ Date-Modified = {2007-12-10 15:26:52 +0100},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/E-SCIENCE.2006.81},
+ Isbn = {0-7695-2734-5},
+ Journal = {e-science},
+ Pages = {45},
+ Publisher = {IEEE Computer Society},
+ Title = {GridBeans: Support e-Science and Grid Applications},
+ Volume = {0},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/E-SCIENCE.2006.81}}
+
+ at article{10.1109/AICT-ICIW.2006.193,
+ Address = {Los Alamitos, CA, USA},
+ Author = {Roger Menday},
+ Date-Added = {2007-12-10 15:21:07 +0100},
+ Date-Modified = {2007-12-10 15:21:07 +0100},
+ Doi = {http://doi.ieeecomputersociety.org/10.1109/AICT-ICIW.2006.193},
+ Isbn = {0-7695-2522-9},
+ Journal = {aict-iciw},
+ Pages = {134},
+ Publisher = {IEEE Computer Society},
+ Title = {The Web Services Architecture and the UNICORE Gateway},
+ Volume = {0},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/AICT-ICIW.2006.193}}
+
+ at misc{gin,
+ Date-Added = {2007-12-08 22:51:32 +0100},
+ Date-Modified = {2008-06-14 20:43:47 +0200},
+ Howpublished = {\url{http://forge.ogf.org/sf/projects/gin}},
+ Keywords = {GIN},
+ Title = {{Grid Interoperability Now (GIN) Community Group}},
+ Year = {2008},
+ Bdsk-Url-1 = {http://forge.ogf.org/sf/projects/gin}}
+
+ at article{Foster:2007kx,
+ Author = {I. Foster and A. Grimshaw and P. Lane and W. Lee and M. Morgan and S. Newhouse and S. Pickles and D. Pulsipher and C. Smith and M. Theimer},
+ Date-Added = {2007-12-08 19:03:15 +0100},
+ Date-Modified = {2008-06-30 19:47:54 +0200},
+ Keywords = {Grid Computing, OGSA, Execution, OGSA BES},
+ Title = {{OGSA Basic Execution Service -- Version 1.0}},
+ Url = {http://www.ogf.org/documents/GFD.108.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://www.ogf.org/documents/GFD.108.pdf}}
+
+ at article{JZCSW03,
+ Abstract = {Grid computing emerges as effective technologies to couple geographically distributed resources and solve large-scale computational problems in wide area networked. The fault tolerance is a significant and complex issue in grid computing systems. Various techniques have been investigated to detect and correct faults in distributed computing systems. Unreliable fault detection is one of the most effective techniques. Globus as a grid middleware manages resources in a wide area network. The Globus fault detection service uses the well-known techniques based on unreliable fault detectors to detect and report component failures. However, more powerful techniques are required to detect and correct both system-level and application-level faults in a grid system, and a convenient toolkit is also needed to maintain the consistency in the grid. A fault-tolerant grid platform (FTGP) based on an unreliable fault detector and the Globus fault detection service is presented
+ in this paper. The platform offers effective strategies in such three aspects as grid key components, user tasks, and high-level applications.},
+ Author = {H. Jin and D. Zou and H. Chen and J. Sun and S. Wu},
+ Date-Modified = {2008-06-20 13:34:47 +0200},
+ Issn = {1000-9000},
+ Journal = {Journal of Computer Science and Technology},
+ Month = {July},
+ Number = {4},
+ Pages = {423--433},
+ Title = {{Fault-Tolerant Grid Architecture and Practice}},
+ Volume = {18},
+ Year = {2003}}
+
+ at inproceedings{IACCKR05,
+ Abstract = {A grid consists of high-end computational, storage, and network resources that, while known a priori, are dynamic with respect to activity and availability. Efficient scheduling of requests to use grid resources must adapt to this dynamic environment while meeting administrative policies. In this paper, we describe a framework called SPHINX that can administrate grid policies, and schedule complex and data intensive scientific applications. We present experimental results for several scheduling strategies that effectively utilize the monitoring and job-tracking information provided by SPHINX. These results demonstrate that SPHINX can effectively schedule work across a large number of distributed clusters that are owned by multiple units in a virtual organization in a fault-tolerant way in spite of the highly dynamic nature of the grid and complex policy issues. The novelty lies in use of effective monitoring of resources and job execution tracking in making scheduling decisions and fault-tolerance - something that is missed in today's grid environments. },
+ Address = {Denver, Colorado},
+ Author = {Jang-uk In and Paul Avery and Richard Cavanaugh and Laukik Chitnis and Mandar Kulkarni and Sanjay Ranka},
+ Booktitle = {Proceedings of the 19th IEEE International Parallel and Distributed Processing Symposium (IPDPS'05)},
+ Location = {Los Alamitos},
+ Month = {3-8 April},
+ Pages = {p. 12b},
+ Publisher = {IEEE Computer Society},
+ Title = {SPHINX: A Fault-Tolerant System for Scheduling in Dynamic Grid Environments},
+ Year = {2005}}
+
+ at inproceedings{FBD01,
+ Abstract = {Initial versions of MPI were designed to work efficiently on multi-processors which had very little job control and thus static process models. Subsequently forcing them to support a dynamic process model suitable for use on clusters or distributed systems would have reduced their performance. As current HPC collaborative applications increase in size and distribution the potential levels of node and network failures increase the need arises for new fault tolerant systems to be developed. Here we present a new implementation of MPI called FT-MPI that allows the semantics and associated modes of failures to be explicitly controlled by an application via a modified MPI API. Given is an overview of the FT-MPI semantics, design, example applications and some performance issues such as efficient group communications and complex data handling.},
+ Address = {Berlin, Heidelberg},
+ Author = {Graham E. Fagg and Antonin Bukovsky and Jack Dongarra},
+ Booktitle = {International Conference on Computational Science (ICCS 2001)},
+ Pages = {355--366},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Fault Tolerant MPI for the HARNESS Meta-computing System},
+ Volume = {2073},
+ Year = {2001}}
+
+ at inproceedings{Hwang:2003gf,
+ Abstract = {The generic, heterogeneous, and dynamic nature of the grid requires a new from of failure recovery mechanism to address its unique requirements such as support for diverse failure handling strategies, separation of failure handling strategies from application codes, and user-defined exception handling. We here propose a grid workflow system (grid-WFS), a flexible failure handling framework for the grid, which addresses these grid-unique failure recovery requirements. Central to the framework is flexibility by the use of workflow structure as a high-level recovery policy specification. We show how this use of high-level workflow structure allows users to achieve failure recovery in a variety of ways depending on the requirements and constraints of their applications. We also demonstrate that this use of workflow structure enables users to not only rapidly prototype and investigate failure handling strategies, but also easily change them by simply modifying the encompassing workflow structure, while the application code remains intact. Finally, we present an experimental evaluation of our framework using a simulation, demonstrating the value of supporting multiple failure recovery techniques in grid systems to achieve high performance in the presence of failures.},
+ Author = {Hwang, Soonwook and Kesselman, C.},
+ Citeulike-Article-Id = {837231},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Journal = {High Performance Distributed Computing, 2003. Proceedings. 12th IEEE International Symposium on},
+ Keywords = {diplomarbeit, grid},
+ Pages = {126--137},
+ Priority = {2},
+ Title = {Grid workflow: a flexible failure handling framework for the grid},
+ Url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1210023},
+ Year = {2003},
+ Bdsk-Url-1 = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1210023}}
+
+ at article{Lee:2002vn,
+ Abstract = {The notion of computational resiliency refers to the ability of a distributed application to tolerate intrusion when under information warfare (IW) attack. This technology seeks an active strengthening of a military mission, rather than protecting its network infrastructure using static defensive measures such as network security, intrusion sensors, and firewalls. Computational resiliency involves the dynamic use of replication, guided by mission policy, to achieve intrusion tolerance so that even undetected attacks do not cause mission failure; however, it goes further to dynamically regenerate replication in response to an IW attack, allowing the level of system assurance to be restored and maintained. Replicated structures are protected through several techniques such as camouflage, dispersion, and layered security policy. This paper describes a prototype concurrent programming technology that we have developed to support computational resiliency and describ
+es how the library has been applied in two prototypical applications. Copyright {\copyright} 2002 John Wiley & Sons, Ltd.},
+ Author = {J. Lee and S. J. Chapin and S. Taylor},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:34:25 +0200},
+ Journal = {Quality and Reliability Engineering International},
+ Keywords = {Fault Tolerance},
+ Month = {May/June},
+ Number = {3},
+ Pages = {185--199},
+ Title = {{Computational Resiliency}},
+ Volume = {18},
+ Year = {2002}}
+
+ at inproceedings{TG07,
+ Abstract = {MPI implementations that support the highest level of thread safety for user programs, MPI_THREAD_MULTIPLE, are becoming widely available. Users often expect that different threads can execute independently and that the MPI implementation can provide the necessary level of thread safety with only a small overhead. The MPI Standard, however, requires only that no MPI call in one thread block MPI calls in other threads; it makes no performance guarantees. Therefore, some way of measuring an implementation's performance is needed. In this paper, we propose a number of performance tests that are motivated by typical application scenarios. These tests cover the overhead of providing the MPI_THREAD_MULTIPLE level of thread safety for user programs, the amout of concurrency in different threads making MPI calls, the ability to overlap communication with computation, and other features. We present performance results with this test suite on several platforms (Linux cluster, Sun and IBM SMPs) and MPI implementations (MPICH2, Open MPI, IBM, and Sun).},
+ Address = {Berlin, Heidelberg},
+ Author = {R. Thakur and W. Gropp},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {MPI},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {46-55},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Test Suite for Evaluating Performance of MPI Implementations That Support MPI_THREAD_MULTIPLE},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{SR07,
+ Abstract = {Writing applications capable of executing efficiently in Grids is extremely difficult and tedious for inexperienced users. The distributed resources are typically heterogeneous, non-dedicated, and are offered without any performance or availability guarantees. Systems capable of adapting the execution of an application to the dynamic characteristics of the Grid are essential. This work describes the strategy used to bestow the self-healing property on autonomic EasyGrid MPI applications to withstand process and resource failures. This paper highlights both the difficulties and the low cost solution adopted to offer fault tolerance in applications based on the standard Grid installation of LAM/MPI.},
+ Address = {Berlin, Heidelberg},
+ Author = {J.A. da Silva and V.E.F. Rebello},
+ Booktitle = {Proceedings of the 14th EuroPVM/MPI},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-04-07 22:19:24 -0400},
+ Isbn = {3-540-75415-6},
+ Keywords = {Fault Tolerance},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {144-152},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {{Low Cost Self-healing in MPI Applications}},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{CGIKS07,
+ Abstract = {Parallelism in file systems is obtained by using several independent server nodes supporting one or more secondary storage devices. This approach increases the performance and scalability of the system, but a fault in one single node can make the whole system fail. In order to avoid this problem, data must be stored using some kind of redundant technique, so that it can be recovered in case of failure. Fault tolerance can be provided in I/O systems by using replication or RAID based schemes. However, most of the current systems apply the same technique of fault tolerant at disk or file system level.
+This paper describes how fault tolerance support can be used by MPI applications based on PVFS version 2, as well-know parallel file system for clusters. This support can be applied to other parallel file systems with many benefits: fault tolerance at file level, flexible definition of new fault tolerance scheme, and dynamic reconfiguration of the fault tolerance policy.},
+ Address = {Berlin, Heidelberg},
+ Author = {A. Calderon and F. Garcia-Carballeira and F. Isaila and R. Keller and A. Schulz},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {Fault Tolerance},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {153-160},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Fault Tolerant File Models for MPI-IO Parallel File Systems},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{TDAN07,
+ Abstract = {MPI applications may waste thousands of CPU cycles if they do not efficiently overlap communications and computation. In this paper, we present a generic and portable I/O manager that is able to make communication progress asynchronously using tasklets. It chooses automatically the most appropriate communication method, depending on the context: multi-threaded application or not, SMP machine or not. We have implemented and evaluated our I/O manager with Mad-MPI, our own MPI implementation, and compared it to other existing MPI implementations regarding the ability to efficiently overlap communication and computation.},
+ Address = {Berlin, Heidelberg},
+ Author = {F. Trahay and A. Denis and O. Aumage and R. Namyst},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {MPI},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {170-177},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Improving Reactivity and Communication Overlap in MPI Using a Generic I/O Manager},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{SBBSB07,
+ Abstract = {The default messaging model for the OpenFabrics ``Verbs'' API is to consume receive buffers in order -- regardless of the actual incoming message size -- leading to inefficient registered memory usage. For example, many small messages can consume large amounts of registered memory. This paper introduces a new transport protocol in Open MPI implemented using the existing OpenFabrics Verbs API that exhibits efficient registered memory utilization. Several real-world applications were run at scale with the new protocol; results show that global network resource utilization efficiency increases, allowing increased scalability -- and larger problem sizes -- on clusters which can increase application performance in some cases.},
+ Address = {Berlin, Heidelberg},
+ Author = {G.M. Shipman and R. Brightwell and B. Barrett and J.M. Squyres and G. Bloch},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {InfiniBand},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {178-186},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Investigations on InfiniBand: Efficient Network Buffer Utilization at Scale},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{LSGHGH07,
+ Abstract = {Recently, Multicluster environments have become more important in the high-performance computing world. However, less attention has been paid to non-dedicated Multiclusters. We are developing MetaLoRaS, an efficient two-level MetaScheduler for non-dedicated environments, which assigns PVM and MPI applications according to an estimation of the turnaround time in each particular cluster.
+The main MetaScheduler goal is to minimize the average job turnaround time in a non-dedicated environment. The efficiency of MetaLoRaS depends on the prediction accuracy of the system and its ability to take decisions according to changes in local workload.
+In this paper we present different Metascheduling techniques that take the dynamics of the local workload into account and compare their effects on system performance. We evaluate the prediction accuracy in relation to the low-level queues sizes. Finally, we analyze the relationship between prediction accuracy and system performance.},
+ Address = {Berlin, Heidelberg},
+ Author = {J.Ll. Lerida and F. Solsona and F. Gine and M. Hanzich and J.R. Garcia and P. Hernandez},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {Grid},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {195-203},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {MetaLoRaS: A Re-scheduling and Prediction MetaScheduler for Non-dedicated Multiclusters},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{PTIW07,
+ Abstract = {Many existing clusters use inexpensive Gigabit Ethernet and often have multiple interfaces cards to improve bandwidth and enhance fault tolerance. We investigate the use of Concurrent Multipath Transfer (CMT), an extension to the Stream Control Transmission Protocol (SCTP), to take advantage of multiple network interfaces for use with MPI programs. We evaluate the performance of our system with microbenchmarks and MPI collective routines. We also compare our method, which exploys CMT at the transport layer in the operating system kernel, to existing systems that support multi-railing in the middleware. We discuss performance with respect to bandwidth, latency, congestion control and fault tolerance.},
+ Address = {Berlin, Heidelberg},
+ Author = {B. Penoff and M. Tsai and J. Iyengar and A. Wagner},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {Fault Tolerance},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {204-212},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Using CMT in SCTP-Based MPI to Exploit Multiple Interfaces in Cluster Nodes},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{BSL07,
+ Abstract = {The Message Passing Interface provides an interface for one-sided communication as part of the MPI-2 standard. The semantics specified by MPI-2 allow for a number of different implementation avenues, each with different performance characteristics. Within the context of Open MPI, a freely available high performance MPI implementation, we analyze a number of implementation possibilities,including layering over MPI-1 send/receive and true remote memory access.},
+ Address = {Berlin, Heidelberg},
+ Author = {B.W. Barrett and G.M. Shipman and A. Lumsdaine},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {OSC},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {242-250},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Analysis of Implementation Options for MPI-2 One-Sided},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{SNMP07,
+ Abstract = {MPI-2's One-sided communication interface is being explored in scientific applications. One of the important operations in a one sided model is read-modify-write. MPI-2 semantics provide MPI_Put, MPI_Get and MPI_Accumulate operations which can be used to implement read-modify-write functionality. The different strategies yield varying performance benefits depending on the underlying one-sided implementation. We use HPCC Random Access benchmark which primarily uses read-modify-write operations as a case study for evaluating the different implementation strategies in this paper. Currently this benchmark is implemented based on MPI two-sided semantics. In this work we design and evaluate MPI-2 versions of the HPCC Random Access benchmark using one-sided operations. To improve the performance, we explore two different optimizations: (i) software based aggregation and (ii) hardware-based atomic operations. We evaluate our different approaches on an InfiniBand cluster. The software based aggregation outperforms the basic one sided scheme without aggregation by a factor of 4.38. The hardware based scheme shows an improvement by a factor of 2.62 as compared to the basic one sided scheme.},
+ Address = {Berlin, Heidelberg},
+ Author = {G. Santhanaraman and S. Narravula and A.R. Mamidala and D.K. Panda},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {OSC},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {251-259},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {MPI-2 One-Sided Usage and Implementation for Read Modify Write Operations: A Case Study with HPCC},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{SGLW07,
+ Abstract = {The SiCortex cluster systems implement a high-bandwidth, low-latency interconnect. We describe how the SiCortex systems implement RDMA, including zero-copy data transfers and user-level networking. The system uses optimistic virtual memory registration without page locking. Finally, we provide preliminary performance results.},
+ Address = {Berlin, Heidelberg},
+ Author = {L.C. Stewart and D. Gingold and J. Leonard and P. Watkins},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {OSC},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {260-271},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {RDMA in the SiCortex Cluster Systems},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{GT07,
+ Abstract = {The MPI remote-memory access (RMA) operations provide a different programming model from the regular MPI-1 point-to-point operations. This model is particularly appropriate for cases where there are multiple communication events for each synchronization and where the target memory locations are known by the source processes. In this paper, we describe a benchmark designed to illustrate the performance of RMA with multiple RMA operations for each synchronization, as compared with point-to-point communication. We measured the performance of this benchmark on several platforms (SGI Altix, Sun Fire, IBM S<mp, Linux cluster) and MPI implementations (SGI, Sun, IBM, MPICH2, Open MPI). We also investigated the effectiveness of the various optimization options specified by the MPI standard. Our results show that MPI RMA can provide substantially higher performance than point-to-point communication on some platforms, such as SGI Altix and Sun Fire. The results also show that many opportunities still exist for performance improvements in the implementation of MPI RMA.},
+ Address = {Berlin, Heidelberg},
+ Author = {W.D. Gropp and R. Thakur},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {OSC},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {272-280},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Revealing the Performance of MPI RMA Implementations},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{Traff:2007fk,
+ Abstract = {The MPI Standard does not make any performance guarantees, but users expect (and like) MPI implementations to deliver good performance. A common-sense expectation of performance is that an MPI function should perform no worse than a combination of other MPI functions that can implement the same functionality. In this paper, we formulate some performance requirements and conditions that good MPI implementations can be expected to fulfill by relating aspects of the MPI standard to each other. Such a performance formulation could be used by benchmarks and tools, such as SKaMPI and Perfbase, to automatically verify whether a given MPI implementation fulfills basic performance requirements. We present examples where some of these requirements are not satisfied, demonstrating that there remains room for improvement in MPI implementations.},
+ Address = {Berlin, Heidelberg},
+ Author = {J. L. Tr\"aff and W. Gropp and R. Thakur},
+ Booktitle = {Proceedings of the 14th European PVM/MPI Users' Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Isbn = {3-540-75415-6},
+ Keywords = {MPI},
+ Location = {Paris, France},
+ Month = {September/October},
+ Pages = {36-45},
+ Publisher = {Springer-Verlag},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Self-consistent MPI Performance Requirements},
+ Volume = {4757},
+ Year = {2007}}
+
+ at inproceedings{drmaa07,
+ Address = {{Rio de Janeiro, Brazil}},
+ Author = {{P. Tr\"oger and H. Rajic and A. Haas and P. Domagalski}},
+ Booktitle = {{Proceedings of the Seventh IEEE International Symposium on Cluster Computing and the Grid (CCGrid 2007)}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-22 18:12:57 +0200},
+ Doi = {{10.1109/CCGRID.2007.109}},
+ Isbn = {{0-7695-2833-3}},
+ Month = {{May 14-17}},
+ Pages = {{619--626}},
+ Title = {{{Standardization of an API for Distributed Resource Management Systems}}},
+ Year = {{2007}},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/CCGRID.2007.109%7D}}
+
+ at inproceedings{SAGA_Jha07b,
+ Author = {Jha, S. and Kaiser, H. and El Khamra, Y. and Weidner, O.},
+ Booktitle = {Accepted for 3rd IEEE Conference on eScience2007 and Grid Computing, Bangalore, India.},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 13:46:32 +0200},
+ Title = {Design and Implementation of Network Performance Aware Applications Using SAGA and Cactus},
+ Url = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf},
+ Year = {2007},
+ Bdsk-Url-1 = {http://saga.cct.lsu.edu/publications/saga_cactus_escience.pdf}}
+
+ at article{SAGA_Goodale06a,
+ Author = {Goodale, T. and Jha, S. and Kaiser, H. and Kielmann, T. and Kleijer, P. and von Laszewski, G. and Lee, C. and Merzky, A. and Rajic, H. and Shalf, J.},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 13:47:31 +0200},
+ Journal = {Computational Methods in Science and Technology},
+ Number = {1},
+ Pages = {7-20},
+ Title = {{SAGA: A Simple API for Grid Applications, High-Level Application Programming on the Grid}},
+ Url = {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_applications_sc05.pdf},
+ Volume = {12},
+ Year = {2006},
+ Bdsk-Url-1 = {http://saga.cct.lsu.edu/publications/saga_paper-a_simple_api_for_grid_applications_sc05.pdf}}
+
+ at article{Cactus_Allen01a,
+ Author = {G. Allen and D. Angulo and I. Foster and G. Lanfermann and C. Liu and T. Radke and E. Seidel and J. Shalf},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Journal = {International Journal of High Performance Computing Applications},
+ Keyword = {computerscience, general},
+ Number = {4},
+ Title = {{The Cactus Worm: Experiments with Dynamic Resource Discovery and Allocation in a Grid Environment}},
+ Url = {http://hpc.sagepub.com/cgi/content/short/15/4/345},
+ Urlpdf = {http://www.cactuscode.org/Articles/Cactus_Allen01a.pre.pdf},
+ Volume = {15},
+ Year = {2001},
+ Bdsk-Url-1 = {http://hpc.sagepub.com/cgi/content/short/15/4/345}}
+
+ at techreport{Cactus_Talbot00a,
+ Author = {B. Talbot and S. Zhou and G. Higgins},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Institution = {Software Engineering Support of the Third Round of Scientific Grand Challenge Investigations},
+ Keyword = {cactuscode, general},
+ Title = {{Review of the Cactus Framework}},
+ Type = {Task 4 Report: Earth System Modeling Framework Survey},
+ Url = {http://ct.gsfc.nasa.gov/esmf_tasc/Files/Cactus_b.html},
+ Year = {2000},
+ Bdsk-Url-1 = {http://ct.gsfc.nasa.gov/esmf_tasc/Files/Cactus_b.html}}
+
+ at inproceedings{Cactus_Allen01e,
+ Address = {Denver, USA},
+ Author = {G. Allen and T. Dramlitsch and I. Foster and N. Karonis and M. Ripeanu and E. Seidel and B. Toonen},
+ Booktitle = {Proceedings of Supercomputing 2001},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-19 09:12:23 +0200},
+ Keyword = {cactuscode, computerscience, general},
+ Title = {{Supporting Efficient Execution in Heterogeneous Distributed Computing Environments with Cactus and Globus}},
+ Url = {http://portal.acm.org/citation.cfm?coll=GUIDE&dl=GUIDE&id=582086},
+ Urlpdf = {http://www.cactuscode.org/Articles/Cactus_Allen01e.pre.pdf},
+ Year = {2001},
+ Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?coll=GUIDE&dl=GUIDE&id=582086}}
+
+ at inproceedings{Cactus_Goodale03a,
+ Author = {T. Goodale and G. Allen and G. Lanfermann and J. Mass{\'{o}} and T. Radke and E. Seidel and J. Shalf},
+ Booktitle = {Vector and Parallel Processing - VECPAR '2002, 5th International Conference},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Keyword = {cactuscode, general},
+ Publisher = {Springer},
+ Title = {{The Cactus Framework and Toolkit: Design and Applications}},
+ Url = {http://www.springerlink.com/content/2fapcbeyyc1xg0mm/},
+ Urlpdf = {http://www.cactuscode.org/Articles/Cactus_Goodale03a.pre.pdf},
+ Year = {2003},
+ Bdsk-Url-1 = {http://www.springerlink.com/content/2fapcbeyyc1xg0mm/}}
+
+ at techreport{Cactus_Talbot01a,
+ Author = {B. Talbot and S. Zhou and G. Higgins},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Institution = {NASA},
+ Keyword = {cactuscode, general},
+ Month = {June},
+ Title = {{Software Engineering Support of the Third Round of Scientific Grand Challenge Investigations}},
+ Type = {An Earth Modelling System Software Framework Strawman Design that Integrates Cactus and UCLA/UCB Distributed Data Broker - Task 5 Final Report},
+ Url = {http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20020068943_2002111113.pdf},
+ Year = {2002},
+ Bdsk-Url-1 = {http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/20020068943_2002111113.pdf}}
+
+ at article{944148,
+ Address = {Beijing, China},
+ Author = {H. Jin and D. Zou and H. Chen and J. Sun and S. Wu},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-20 13:33:30 +0200},
+ Issn = {1000-9000},
+ Journal = {Journal of Computer Science and Technology},
+ Number = {4},
+ Pages = {423--433},
+ Publisher = {Institute of Computing Technology},
+ Title = {{Fault-Tolerant Grid Architecture and Practice}},
+ Volume = {18},
+ Year = {2003}}
+
+ at misc{dgrid,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-03-28 22:35:05 +0100},
+ Howpublished = {{\url{http://www.d-grid.de/}}},
+ Key = {D-Grid},
+ Title = {{{D-Grid -- German Grid Initiative}}},
+ Year = {{2008}}}
+
+ at inproceedings{Zhang04,
+ Address = {Washington, DC, USA},
+ Author = {X. Zhang and D. Zagorodnov and M. Hiltunen and K. Marzullo and R. Schlichting},
+ Booktitle = {CLUSTER '04: Proceedings of the 2004 IEEE International Conference on Cluster Computing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:38:58 +0200},
+ Isbn = {0-7803-8694-9},
+ Pages = {105--114},
+ Publisher = {IEEE Computer Society},
+ Title = {{Fault-tolerant Grid Services using Primary-Backup: Feasibility and Performance}},
+ Year = {2004}}
+
+ at article{78972,
+ Address = {New York, NY, USA},
+ Author = {Maurice P. Herlihy and Jeannette M. Wing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Doi = {http://doi.acm.org/10.1145/78969.78972},
+ Issn = {0164-0925},
+ Journal = {ACM Trans. Program. Lang. Syst.},
+ Number = {3},
+ Pages = {463--492},
+ Publisher = {ACM},
+ Title = {{Linearizability: A Correctness Condition for Concurrent Objects}},
+ Volume = {12},
+ Year = {1990},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/78969.78972}}
+
+ at article{DBLP:journals/tc/Lamport79,
+ Author = {L. Lamport},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-28 11:58:35 +0200},
+ Journal = {IEEE Transactions on Computers},
+ Number = {9},
+ Pages = {690-691},
+ Title = {{How to Make a Multiprocessor Computer That Correctly Executes Multiprocess Programs}},
+ Volume = {28},
+ Year = {1979}}
+
+ at article{1115721,
+ Address = {Los Alamitos, CA, USA},
+ Author = {K. P. Birman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:30:31 +0200},
+ Doi = {http://dx.doi.org/10.1109/MC.2006.73},
+ Issn = {0018-9162},
+ Journal = {Computer},
+ Number = {2},
+ Pages = {98},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{The Untrustworthy Web Services Revolution}},
+ Volume = {39},
+ Year = {2006},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1109/MC.2006.73}}
+
+ at inproceedings{DBLP:conf/sofsem/Schiper06,
+ Author = {A. Schiper},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {SOFSEM},
+ Crossref = {DBLP:conf/sofsem/2006},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-27 22:58:21 +0200},
+ Ee = {http://dx.doi.org/10.1007/11611257_10},
+ Pages = {117-136},
+ Title = {{Group Communication: From Practice to Theory }},
+ Year = {2006}}
+
+ at proceedings{DBLP:conf/sofsem/2006,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {SOFSEM},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Editor = {Jir\'{\i} Wiedermann and Gerard Tel and Jaroslav Pokorn{\'y} and M{\'a}ria Bielikov{\'a} and Julius Stuller},
+ Isbn = {3-540-31198-X},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {SOFSEM 2006: Theory and Practice of Computer Science, 32nd Conference on Current Trends in Theory and Practice of Computer Science, Mer\'{\i}n, Czech Republic, January 21-27, 2006, Proceedings},
+ Volume = {3831},
+ Year = {2006}}
+
+ at article{birman99bimodal,
+ Author = {K. P. Birman and M. Hayden and O. Ozkasap and Z. Xiao and M. Budiu and Y. Minsky},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:30:21 +0200},
+ Journal = {ACM Transactions on Computer Systems},
+ Keywords = {Group Communication, Fault Tolerance},
+ Number = {2},
+ Pages = {41--88},
+ Title = {{Bimodal multicast}},
+ Url = {citeseer.ist.psu.edu/article/birman99bimodal.html},
+ Volume = {17},
+ Year = {1999},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/article/birman99bimodal.html}}
+
+ at inproceedings{jacobson88congestion,
+ Address = {Stanford, CA},
+ Author = {Van Jacobson},
+ Booktitle = {{ACM} {SIGCOMM} '88},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Month = aug,
+ Pages = {314-329},
+ Title = {{Congestion Avoidance and Control}},
+ Url = {citeseer.ist.psu.edu/jacobson88congestion.html},
+ Year = {1988},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/jacobson88congestion.html}}
+
+ at article{1029434,
+ Address = {New York, NY, USA},
+ Author = {Eduardo Huedo and Ruben S. Montero and Ignacio M. Llorente},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:12 +0100},
+ Doi = {http://dx.doi.org/10.1002/spe.584},
+ Issn = {0038-0644},
+ Journal = {Softw. Pract. Exper.},
+ Number = {7},
+ Pages = {631--651},
+ Publisher = {John Wiley \& Sons, Inc.},
+ Title = {{A Framework for Adaptive Execution in Grids}},
+ Volume = {34},
+ Year = {2004},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1002/spe.584}}
+
+ at article{jackson01core,
+ Author = {David Jackson and Quinn Snell and Mark Clement},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Journal = {Lecture Notes in Computer Science},
+ Pages = {87},
+ Title = {{Core Algorithms of the Maui Scheduler}},
+ Volume = {2221},
+ Year = {2001}}
+
+ at inproceedings{RoeblitzRzadca06,
+ Author = {R\"oblitz, Thomas and Rzadca, Krzysztof},
+ Booktitle = {Euro-Par 2006 Proceedings},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Keywords = {WP6},
+ Pages = {198-210},
+ Publisher = {Springer},
+ Title = {{On the Placement of Reservations into Job Schedules}},
+ Volume = {4128},
+ Year = {2006}}
+
+ at article{schnorLuckow08,
+ Address = {Amsterdam, The Netherlands, The Netherlands},
+ Author = {A. Luckow and B. Schnor},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:57:29 +0200},
+ Doi = {http://dx.doi.org/10.1016/j.future.2007.03.007},
+ Issn = {0167-739X},
+ Journal = {Future Generation Computer Systems -- The International Journal of Grid Computing: Theory, Methods and Application},
+ Number = {2},
+ Pages = {142--152},
+ Publisher = {Elsevier Science Publishers B. V.},
+ Title = {{Migol: A Fault-Tolerant Service Framework for MPI Applications in the Grid}},
+ Volume = {24},
+ Year = {2008},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.future.2007.03.007}}
+
+ at misc{bbgrid,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://bb-grid.informatik.tu-cottbus.de/}},
+ Title = {{Berlin-Brandenburg Grid (BB-Grid)}},
+ Year = {2007}}
+
+ at misc{ws-agreement06,
+ Author = {Alain Andrieux and Karl Czajkowski and Asit Dan and Kate Keahey and Heiko Ludwig and Toshiyuki Nakata and Jim Pruyne and John Rofrano and Steve Tuecke and Ming Xu},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{ https://forge.gridforum.org/sf/go/projects.graap-wg/}},
+ Title = {{Web Services Agreement Specification (WS-Agreement)}},
+ Year = {2006}}
+
+ at inproceedings{feller07,
+ Address = {Madison, WI, USA},
+ Author = {M. Feller and I. Foster and S. Martin},
+ Booktitle = {Proceedings of the Teragrid 2007 Conference},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{ http://www.globus.org/alliance/publications/papers/TG07-GRAM-comparison.pdf}},
+ Title = {{GT4 GRAM: A Functionality and Performance Study}},
+ Year = {2007}}
+
+ at inproceedings{DBLP:conf/parco/StreitWWZ05,
+ Author = {A. Streit and O. W{\"a}ldrich and P. Wieder and W. Ziegler},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {PARCO},
+ Crossref = {DBLP:conf/parco/2005},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:32:48 +0200},
+ Pages = {57-64},
+ Title = {{On Scheduling in UNICORE - Extending the Web Services Agreement based Resource Management Framework.}},
+ Year = {2005}}
+
+ at proceedings{DBLP:conf/parco/2005,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {PARCO},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Gerhard R. Joubert and Wolfgang E. Nagel and Frans J. Peters and Oscar G. Plata and P. Tirado and Emilio L. Zapata},
+ Isbn = {3-00-017352-8},
+ Publisher = {Central Institute for Applied Mathematics, J{\"u}lich, Germany},
+ Series = {John von Neumann Institute for Computing Series},
+ Title = {Parallel Computing: Current {\&} Future Issues of High-End Computing, Proceedings of the International Conference ParCo 2005, 13-16 September 2005, Department of Computer Architecture, University of Malaga, Spain},
+ Volume = {33},
+ Year = {2005}}
+
+ at inproceedings{DBLP:conf/iwqos/BurchardD03,
+ Author = {Lars-Olof Burchard and Marc Droste-Franke},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {IWQoS},
+ Crossref = {DBLP:conf/iwqos/2003},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Ee = {http://link.springer.de/link/service/series/0558/bibs/2707/27070215.htm},
+ Pages = {215-230},
+ Title = {Fault Tolerance in Networks with an Advance Reservation Service.},
+ Year = {2003}}
+
+ at proceedings{DBLP:conf/iwqos/2003,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {IWQoS},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Kevin Jeffay and Ion Stoica and Klaus Wehrle},
+ Isbn = {3-540-40281-0},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Quality of Service - IWQoS 2003, 11th International Workshop, Berkeley, CA, USA, June 2-4, 2003, Proceedings},
+ Volume = {2707},
+ Year = {2003}}
+
+ at inproceedings{reservations,
+ Author = {Thomas R{\"o}blitz and Florian Schintke and Jan Wendler},
+ Booktitle = {{Proceedings of the Workshop on Adaptive Grid Middleware (AGridM'04)}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Month = {September},
+ Title = {{Elastic Grid Reservations with User-Defined Optimization Policies}},
+ Year = {2004}}
+
+ at inproceedings{823372,
+ Address = {Washington, DC, USA},
+ Author = {C. Liu and L. Yang and I. Foster and D. Angulo},
+ Booktitle = {HPDC '02: Proceedings of the 11 th IEEE International Symposium on High Performance Distributed Computing HPDC-11 20002 (HPDC'02)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:43:15 +0200},
+ Isbn = {0-7695-1686-6},
+ Pages = {63},
+ Publisher = {IEEE Computer Society},
+ Title = {Design and Evaluation of a Resource Selection Framework for Grid Applications},
+ Year = {2002}}
+
+ at inproceedings{BurchardEtAl-2005-autonomy,
+ Author = {L. Burchard and H.-U. Heiss and B. Linnert and J. Schneider and O. Kao and M. Hovestadt and F. Heine and A. Keller},
+ Booktitle = {Future Generation Grids},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-19 22:50:21 +0200},
+ Editor = {Getov, Vladimir and Laforenza, Domenico and Reinefeld, Alexander},
+ Isbn = {0-387-27935-0},
+ Keywords = {Grid Computing, Quality of Service, VRM, Cluster Computing},
+ Series = {CoreGrid},
+ Title = {{The Virtual Resource Manager: Local Autonomy versus Qo{S} Guarantees for Grid Applications}},
+ Topic = {Grid Computing, Quality of Service, VRM, Cluster Computing},
+ Volume = {2},
+ Year = {2006}}
+
+ at inproceedings{DBLP:conf/europar/RoblitzR06,
+ Author = {Thomas R{\"o}blitz and Krzysztof Rzadca},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Crossref = {DBLP:conf/europar/2006},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Ee = {http://dx.doi.org/10.1007/11823285_21},
+ Pages = {198-210},
+ Title = {On the Placement of Reservations into Job Schedules.},
+ Year = {2006}}
+
+ at proceedings{DBLP:conf/europar/2006,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Wolfgang E. Nagel and Wolfgang V. Walter and Wolfgang Lehner},
+ Isbn = {3-540-37783-2},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Euro-Par 2006, Parallel Processing, 12th International Euro-Par Conference, Dresden, Germany, August 28 - September 1, 2006, Proceedings},
+ Volume = {4128},
+ Year = {2006}}
+
+ at book{976113,
+ Address = {Norwell, MA, USA},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Jarek Nabrzyski and Jennifer M. Schopf and Jan Weglarz},
+ Isbn = {1-4020-7575-8},
+ Publisher = {Kluwer Academic Publishers},
+ Title = {{Grid Resource Management: State of the Art and Future trends}},
+ Year = {2004}}
+
+ at inproceedings{schopf06,
+ Author = {J. Schopf and L. Pearlman and N. Miller and C. Kesselman and I. Foster and M. D'Arcy and A. Chervenak},
+ Booktitle = {Journal of Physics: Conference Series -- Proceedings of SciDAC},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 22:59:18 +0200},
+ Title = {{Monitoring the Grid with the Globus Toolkit MDS4}},
+ Year = {2006}}
+
+ at article{springel-2005-364,
+ Author = {V. Springel},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:36:23 +0200},
+ Journal = {Monthly Notices of the Royal Astronomical Society},
+ Pages = {1105},
+ Title = {{The Cosmological Simulation Code GADGET-2}},
+ Url = {http://www.citebase.org/abstract?id=oai:arXiv.org:astro-ph/0505010},
+ Volume = {364},
+ Year = {2005},
+ Bdsk-Url-1 = {http://www.citebase.org/abstract?id=oai:arXiv.org:astro-ph/0505010}}
+
+ at proceedings{DBLP:conf/ipps/2006,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {IPDPS},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Publisher = {IEEE},
+ Title = {20th International Parallel and Distributed Processing Symposium (IPDPS 2006), Proceedings, 25-29 April 2006, Rhodes Island, Greece},
+ Year = {2006}}
+
+ at book{249065,
+ Address = {Greenwich, CT, USA},
+ Author = {K. P. Birman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:30:49 +0200},
+ Isbn = {1-884777-29-5},
+ Publisher = {Manning Publications Co.},
+ Title = {{Building Secure and Reliable Network Applications}},
+ Year = {1997}}
+
+ at book{birman05,
+ Address = {New York, USA},
+ Author = {K. P. Birman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:29:56 +0200},
+ Isbn = {0-387-21509-3},
+ Publisher = {Springer},
+ Title = {{Reliable Distributed Systems -- Technologies, Web Services and Applications}},
+ Year = {2005}}
+
+ at article{schneider90implementing,
+ Author = {F. Schneider},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-19 22:50:30 +0200},
+ Journal = {ACM Computing Surveys},
+ Number = {4},
+ Pages = {299--319},
+ Title = {{Implementing Fault-Tolerant Services Using the State Machine Approach: {A} Tutorial}},
+ Url = {citeseer.ist.psu.edu/schneider90implementing.html},
+ Volume = {22},
+ Year = {1990},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/schneider90implementing.html}}
+
+ at book{Goscinsky91,
+ Author = {Andrzej Goscinsky},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Publisher = {Addison-Wesley},
+ Title = {{Distributed Operating Systems: The Logical Design}},
+ Year = {1991}}
+
+ at article{Amir95,
+ Abstract = {Fault-tolerant distributed systems are becoming more
+ important but, in existing systems, maintaining the
+ consistency of replicated data is quite expencive. The
+ Totem single-ring protocol supports consistent
+ concurrent operations by placing a total order on
+ broadcast messages. This total order is derived from
+ the sequence number in a token that circulates around
+ a logical ring imposed on a set of processors in a
+ broadcast domain. The protocol handles reconfiguration
+ of the system when processors fail and restart or the
+ network partitions and remerges. Extended Virtual
+ Synchrony ensures that processors deliver messages and
+ configuration changes to the application in a
+ consistent total order system-wide. An efficient flow
+ control mechanism enables the Totem single-ring
+ protocol to achieve message ordering rates
+ significantly higher than the best prior total
+ ordering protocols},
+ Author = {Y. Amir and L. Moser and P. Melliar-Smith and D. Agarwal and P. Ciarfella},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-27 23:32:18 +0200},
+ Issn = {0734-2071},
+ Journal = {ACM Transactions on Computer Systems},
+ Number = 4,
+ Pages = {311--342},
+ Title = {{The Totem Single-Ring Ordering and Membership Protocol}},
+ Url = {http://www.cs.huji.ac.il/labs/transis/publications.html},
+ Volume = 13,
+ Year = 1995,
+ Bdsk-Url-1 = {http://www.cs.huji.ac.il/labs/transis/publications.html}}
+
+ at techreport{Chandy82,
+ Author = {K. Chandy},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 23:15:48 +0200},
+ Institution = {University of Texas},
+ Title = {{A Mutual Exclusion Algorithm for Distributed Systems}},
+ Year = {1982}}
+
+ at misc{UDDI-org,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {http://uddi.org/},
+ Key = {UDDI-org},
+ Title = {{UDDI homepage}}}
+
+ at misc{UDDI-replication,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Luc Cl\'{e}ment},
+ Howpublished = {\url{http://uddi.org/pubs/Replication_v2.pdf}},
+ Key = {Cl\'{e}ment},
+ Title = {{{UDDI Version 2.03 Replication Specification}}},
+ Year = {2002}}
+
+ at misc{globus-ars,
+ Author = {Globus Alliance},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://bugzilla.globus.org/globus/attachment.cgi?id=1108}},
+ Title = {{Globus Toolkit Advance Reservation Architecture}},
+ Year = {2006}}
+
+ at misc{uddi-spec,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Luc Clement and Andrew Hately and Claus von Riegen and Tony Rogers},
+ Howpublished = {{\url{http://www.oasis-open.org/committees/uddi-spec/doc/tcspecs.htm}}},
+ Key = {UDDI-org},
+ Publisher = {{Organization for the Advancement of Structured Information (OASIS)}},
+ Title = {{{UDDI Version 3.0.2 -- UDDI Spec Technical Committee Draft}}},
+ Year = {{2004}}}
+
+ at misc{gt4-deleg,
+ Author = {{Globus Alliance}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.globus.org/toolkit/docs/4.0/security/delegation/}},
+ Title = {{GT4 Delegation Service}},
+ Year = {2005}}
+
+ at misc{UDDI-public,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {http://www.uddi.org/register.html},
+ Key = {UDDI-public},
+ Title = {{Public UDDI Registries}}}
+
+ at misc{blagojevic00,
+ Author = {V. Blagojevic},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-28 16:10:45 +0200},
+ Howpublished = {{\url{ http://www.jgroups.org/javagroupsnew/docs/papers/totaltoken.ps.gz}}},
+ Title = {{Implementing Totem's Total Ordering Protocol in JavaGroups Reliable Group Communication Toolkit}},
+ Year = {2000}}
+
+ at misc{ban98design,
+ Author = {B. Ban},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 13:30:24 +0200},
+ Howpublished = {{\url{http://www.jgroups.org/javagroupsnew/docs/papers/Coots.ps.gz }}},
+ Text = {Bela Ban, Design and Implementation of a Reliable Group Communication Toolkit for Java, Cornell University, September 1998.},
+ Title = {{Design and Implementation of a Reliable Group Communication Toolkit for Java}},
+ Url = {citeseer.ist.psu.edu/ban98design.html},
+ Year = {1998},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/ban98design.html}}
+
+ at inproceedings{Boner2007,
+ Address = {Vancouver, Canada},
+ Author = {{Jonas Bon\'{e}r and Eugene Kuleshov}},
+ Booktitle = {Proceedings of AOSD.NET 2007},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Title = {{Clustering the Java Virtual Machine using Aspect-Oriented Programming}},
+ Year = {2007}}
+
+ at inproceedings{hallama06,
+ Address = {San Francisco, USA},
+ Author = {N. Hallama and A. Luckow and B. Schnor},
+ Booktitle = {ISCA 19th International Conference on Parallel and Distributed Computing Systems},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:55:00 +0200},
+ Pages = {76--83},
+ Title = {{Grid Security for Fault Tolerant Grid Applications}},
+ Year = {2006}}
+
+ at inproceedings{1135831,
+ Address = {New York, NY, USA},
+ Author = {Jorge Salas and Francisco Perez-Sorrosal and Marta Patino-Martinez and Ricardo Jimenez-Peris},
+ Booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Doi = {http://doi.acm.org/10.1145/1135777.1135831},
+ Isbn = {1-59593-323-9},
+ Location = {Edinburgh, Scotland},
+ Pages = {357--366},
+ Publisher = {ACM Press},
+ Title = {{WS-Replication: A Framework for Highly Available Web Services}},
+ Year = {2006},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/1135777.1135831}}
+
+ at inproceedings{LSS03,
+ Address = {{Colorado Springs, Colorado, USA}},
+ Author = {G. Lanfermann and B. Schnor and E. Seidel},
+ Booktitle = {{Eighth IFIP/IEEE International Symposium on Integrated Network Management (IM 2003)}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:55:45 +0200},
+ Pages = {519-532},
+ Title = {{Grid Object Description: Characterizing Grids}},
+ Year = {2003}}
+
+ at misc{gt4sec,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Von Welch},
+ Howpublished = {\url{http://www-unix.globus.org/toolkit/docs/4.0/security/GT4-GSI-Overview.pdf}},
+ Key = {Welch},
+ Title = {{Globus Toolkit Version 4 Grid Security Infrastructure: A Standards Perspective}},
+ Year = {2005}}
+
+ at misc{grinder,
+ Author = {{Paco G\'omez, Philip Aston, et.\,al.}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://grinder.sourceforge.net/}},
+ Title = {{The Grinder, a Java Load Testing Framework}},
+ Year = {2006}}
+
+ at inproceedings{schnorLuckow06,
+ Address = {Dresden},
+ Author = {A. Luckow and B. Schnor},
+ Booktitle = {Proceedings 2. Workshop: Grid-Technologie f\"ur den Entwurf technischer Systeme},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:57:19 +0200},
+ Pages = {47-54},
+ Title = {{Migol: A Fault Tolerant Service Framework for Grid Computing -- Evolution to WSRF}},
+ Year = {2006}}
+
+ at misc{welchBarton05,
+ Author = {Von Welch and Tom Barton and Kate Keahey and Frank Siebenlist},
+ Booktitle = {4th Annual PKI R\&D Workshop},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://grid.ncsa.uiuc.edu/papers/gridshib-pki05-final.pdf}},
+ Title = {{Attributes, Anonymity, and Access: Shibboleth and Globus Integration to Facilitate Grid Collaboration}},
+ Year = {2005}}
+
+ at misc{XACML,
+ Author = {OASIS},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{ http://docs.oasis-open.org/xacml/2.0/access_control-xacml-2.0-core-spec-os.pdf}},
+ Title = {{eXtensible Access Control Markup Language (XACML) Version 2.0}},
+ Year = {2005}}
+
+ at article{770786,
+ Address = {Amsterdam, The Netherlands, The Netherlands},
+ Author = {David W. Chadwick and Alexander Otenko},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Doi = {http://dx.doi.org/10.1016/S0167-739X(02)00153-X},
+ Issn = {0167-739X},
+ Journal = {Future Gener. Comput. Syst.},
+ Number = {2},
+ Pages = {277--289},
+ Publisher = {Elsevier Science Publishers B. V.},
+ Title = {The PERMIS X.509 role based privilege management infrastructure},
+ Volume = {19},
+ Year = {2003},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1016/S0167-739X(02)00153-X}}
+
+ at inproceedings{lang06,
+ Address = {Cambridge},
+ Author = {B. Lang and I. Foster and F. Siebenlist and R. Ananthakrishnan and T. Freeman},
+ Booktitle = {Proceedings Fifth IEEE Symposium on Network Computing and Application},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:45:03 +0200},
+ Howpublished = {Avalaible at: \url{http://www.globus.org/alliance/publications/papers/IEEE_NCA_AGC.pdf}},
+ Title = {{A Multipolicy Authorization Framework for Grid Security}},
+ Year = {2006}}
+
+ at misc{Chadwick06,
+ Author = {D.\, W.\,Chadwick and A.\,Novikov and O.\,Otenko},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {Avalaible at: \url{http://www.terena.nl/events/tnc2006/core/getfile.php?file_id=753}},
+ Title = {{GridShib and PERMIS Integration: Adding Policy-driven RBAC to Attribute-based Authorisation in Grids}},
+ Year = {2006}}
+
+ at inproceedings{KK04,
+ Address = {Nicosia, Cyprus},
+ Author = {Jozsef Kovacs and Peter Kacsuk},
+ Booktitle = {Proceedings of the 2nd European Across Grids Conference},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Key = {Grid, migration},
+ Pages = {80 -- 89},
+ Title = {{A Migration Framework for Executing Parallel Programs in the Grid}},
+ Year = {2004}}
+
+ at misc{gtsecurity,
+ Author = {{Globus Alliance}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.globus.org/toolkit/security/}},
+ Title = {{GT Security (GSI)}},
+ Year = {2005}}
+
+ at inbook{TTL03,
+ Author = {Douglas Thain and Todd Tannenbaum and Miron Livny},
+ Chapter = {{Condor and the Grid}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Fran Berman and A.J.G. Hey},
+ Publisher = {John Wiley},
+ Title = {{Grid Computing: Making the Global Infrastructure a Reality}},
+ Year = {2003}}
+
+ at inproceedings{VD03,
+ Author = {Sathish Vadhiyar and Jack Dongarra},
+ Booktitle = {Proceedings of the 3rd IEEE/ACM International Symposium on Cluster Computing and the Grid},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Isbn = {0-7695-1919-9},
+ Pages = {130},
+ Publisher = {IEEE Computer Society},
+ Title = {{A Performance Oriented Migration Framework For The Grid}},
+ Year = {2003}}
+
+ at article{1152086,
+ Address = {Norwell, MA, USA},
+ Author = {F. Berman and H. Casanova and A. Chien and K. Cooper and H. Dail and A. Dasgupta and W. Deng and J. Dongarra and L. Johnsson and K. Kennedy and C. Koelbel and B. Liu and X. Liu and A. Mandal and G. Marin and M. Mazina and J. Mellor-Crummey and C. Mendes and A. Olugbile and M. Patel and D. Reed and Z. Shi and O. Sievert and H. Xia and A. YarKhan},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Doi = {http://dx.doi.org/10.1007/s10766-005-3584-4},
+ Issn = {0885-7458},
+ Journal = {Int. J. Parallel Program.},
+ Number = {2},
+ Pages = {209--229},
+ Publisher = {Kluwer Academic Publishers},
+ Title = {{New Grid Scheduling and Rescheduling Methods in the GrADS Project}},
+ Volume = {33},
+ Year = {2005},
+ Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10766-005-3584-4}}
+
+ at article{CLC05,
+ Abstract = {Although Web-based information systems (WISs) have been widely used by enterprises to accomplish business tasks through the Internet, there is little research on designing a flexible access control and delegation model for WISs. In this paper, we design a user-to-user and role-to-role delegation model (called X-RDR model) for WISs. The authorization and delegation policies are encoded in XML and the granularity of control can be as small as a text-field or button. Additionally, the proposed model supports single-step delegation, multi-step delegation, multiple delegation, partial delegation, separation of duties, and cascading revocation. A prototype was also implemented to demonstrate the feasibility of the proposed model.},
+ Author = {S. Chou and E.J.-L. Lu and Y.-H. Chen},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Journal = {Operating Systems Review},
+ Key = {Delegation, RBAC, Webservice},
+ Number = {1},
+ Pages = {4--21},
+ Title = {{X-RDR: A Role-based Delegation Processor for Web-based Information Systems}},
+ Volume = {39},
+ Year = {2005}}
+
+ at inproceedings{ferraiolo92rolebased,
+ Author = {D. Ferraiolo and R. Kuhn},
+ Booktitle = {15th {NIST}-{NCSC} National Computer Security Conference},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Pages = {554--563},
+ Title = {{Role-Based Access Controls}},
+ Url = {citeseer.ist.psu.edu/ferraiolo92rolebased.html},
+ Year = {1992},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/ferraiolo92rolebased.html}}
+
+ at mastersthesis{Luckow05,
+ Author = {Andr\'e Luckow},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ School = {University Potsdam, Germany},
+ Title = {{Entwurf und Implementation eines Migrationsdienstes fr Grid-Anwendungen gem\"a\ss der Open Grid Service Architecture (OGSA)}},
+ Year = {2005}}
+
+ at misc{unicore-security,
+ Author = {T. Goss-Walter and R. Letz and T. Kentemich and H.-C. Hoppe and P. Wieder},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.ggf.org/documents/GFD.18.pdf}},
+ Title = {{An Analysis of the UNICORE Security Model}},
+ Year = {2003}}
+
+ at misc{gridportlets,
+ Author = {Michael Russell and Jason Novotny and Oliver Wehrens},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.gridsphere.org/gridsphere/html/publications/GridPortlets.pdf}},
+ Title = {{GridSphere's Grid Portlets}},
+ Year = {2005}}
+
+ at misc{unicore,
+ Author = {{UNICORE Forum e.V.}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.unicore.org/documents/UNICOREPlus-Final-Report.pdf}},
+ Title = {{{UNICORE Plus Final Report - Uniform Interface to Computing Resources}}},
+ Year = {2003}}
+
+ at misc{kerberos-prot-trans,
+ Author = {Frederich Chong},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.microsoft.com/technet/prodtechnol/windowsserver2003/technologies /security/constdel.mspx}},
+ Title = {{Kerberos Protocol Transition and Constrained Delegation}},
+ Year = {Visited: 2005}}
+
+ at misc{unigrids,
+ Author = {{UniGrids Project}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.unigrids.org/}},
+ Title = {{Uniform Interface to Grid Services}},
+ Year = {Visited: 10/2005}}
+
+ at misc{unicore-delegation,
+ Author = {David Snelling and Sven van den Berghe and Vivian Qian Li},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.unigrids.org/papers/explicittrust.pdf}},
+ Title = {{Explicit Trust Delegation: Security for Dynamic Grids}},
+ Urldate = {11.06.2005},
+ Year = {2004}}
+
+ at misc{unicore-globus,
+ Author = {Michael Rambadt and Philipp Wieder},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.grid-interoperability.org/unicore_globus_interop_paper_final.pdf }},
+ Title = {{UNICORE -- Globus: Interoperability of Grid Infrastructures}},
+ Year = {2002}}
+
+ at misc{GRIPARCH,
+ Author = {{Grid Interoperability Project}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.grid-interoperability.org/}},
+ Title = {{GRIP-Architektur}},
+ Year = {abgerufen 10/2005}}
+
+ at book{WSSEC2,
+ Author = {Jothy Rosenberg and David Remy},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Publisher = {SAMS Publishing},
+ Title = {{Securing Web Services with WS-Security}},
+ Year = {2004}}
+
+ at misc{KERBRFC,
+ Author = {J. Kohl and C. Neuman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{ftp://ftp.isi.edu/in-notes/rfc1510.txt}},
+ Title = {{RFC 1510: The Kerberos Network Authentication Service (V5)}},
+ Year = {1993}}
+
+ at misc{rfc2904,
+ Author = {J. Vollbrecht and P. Calhoun and S. Farrell and L. Gommans and G. Gross and B. de Bruijn and C. de Laat and M. Holdrege and D. Spence},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{http://www.faqs.org/rfcs/rfc2904.html}},
+ Title = {{RFC 2904: AAA Authorization Framework}},
+ Year = {2000}}
+
+ at misc{Welch2005,
+ Author = {Von Welch and Rachana Ananthakrishnan and Frank Siebenlist and David Chadwick and Sam Meder and Laura Pearlman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {URL: \url{https://forge.gridforum.org/projects/ogsa-authz/document/draft-ogsi-authz- saml-aug15-05.pdf/en/1}},
+ Title = {{Use of SAML for OGSA Authorization}},
+ Year = {2005}}
+
+ at misc{x509,
+ Author = {R. Housley and W. Ford and W. Polk and D. Solo},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.ietf.org/rfc/rfc3280.txt}},
+ Title = {{Internet X.509 Public Key Infrastructure Certificate and Certificate Revocation List (CRL) Profile}},
+ Year = {2002}}
+
+ at misc{cas,
+ Author = {{Globus Alliance}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www-unix.globus.org/toolkit/docs/4.0/security/cas/}},
+ Title = {{Community Authorization Service (CAS): Key Concepts}},
+ Year = {Visited: 05/2006}}
+
+ at misc{cas-paper,
+ Author = {Von Welch and Rachana Ananthakrishnan and Sam Meder and Laura Pearlman and Frank Siebenlist},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://xml.coverpages.org/WelchSAML20030819.pdf}},
+ Title = {{Use of SAML in the Community Authorization Service}},
+ Year = {2003}}
+
+ at misc{oasis-wssc,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Editor = {Martin Gudgin and Anthony Nadalin},
+ Howpublished = {\url{ftp://www6.software.ibm.com/software/developer/library/ws- secureconversation.pdf}},
+ Key = {Gudgin},
+ Title = {{Web Services Secure Conversation Language (WS-SecureConversation)}},
+ Year = {2005}}
+
+ at misc{cumulvs,
+ Author = {James Arthur Kohl and Philip M. Papadopoulos},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.netlib.org/cumulvs/}},
+ Key = {cumulvs},
+ Title = {{CUMULVS Version 1.0}},
+ Year = {1996}}
+
+ at misc{rls-paper,
+ Author = {Ann L. Chervenak and Naveen Palavalli and Shishir Bharathi and Carl Kesselman and Robert Schwartzkopf},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.globus.org/alliance/publications/papers/chervenakhpdc13.pdf}},
+ Title = {{Performance and Scalability of a Replica Location Service}},
+ Year = {2004}}
+
+ at inproceedings{762815,
+ Address = {Los Alamitos, CA, USA},
+ Author = {G. Bosilca and A. Bouteiller and F. Cappello and S. Djilali and G. Fedak and C. Germain and T. Herault and P. Lemarinier and O. Lodygensky and F. Magniette and V. Neri and A. Selikhov},
+ Booktitle = {Supercomputing '02: Proceedings of the 2002 ACM/IEEE conference on Supercomputing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-27 23:39:50 +0200},
+ Location = {Baltimore, Maryland},
+ Pages = {1--18},
+ Publisher = {IEEE Computer Society Press},
+ Title = {{MPICH-V: Toward a Scalable Fault Tolerant MPI for Volatile Nodes}},
+ Year = {2002}}
+
+ at misc{gt4-foster,
+ Author = {L. Liming and I. Foster},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:40:48 +0200},
+ Howpublished = {\url{http://www.grids-center.org/news/clusterworld/0505GridFinal.pdf}},
+ Title = {GT4: What's in it for you?},
+ Year = {2005}}
+
+ at misc{andrew-towards,
+ Author = {A. Nguyen-Tuong and A. Grimshaw and G. Wasson and M. Humphrey and J. Knight},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-27 23:47:53 +0200},
+ Howpublished = {{ \url{http://www.cs.virginia.edu/~techrep/CS-2004-11.pdf}}},
+ Title = {{Towards Dependable Grids}}}
+
+ at misc{myproxy,
+ Author = {Jim Basney and Marty Humphrey and Von Welch},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {\url{http://www.ncsa.uiuc.edu/~jbasney/myproxy-spe.pdf}},
+ Title = {{The MyProxy Online Credential Repository}},
+ Year = {2005}}
+
+ at inproceedings{welchfoster04,
+ Author = {{V. Welch and I. Foster and C. Kesselman and O. Mulmo and L. Pearlman and S. Tuecke and J. Gawor and S. Meder and F. Siebenlist}},
+ Booktitle = {{3rd Annual PKI R\&D Workshop}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Title = {{X.509 Proxy Certificates for Dynamic Delegation}},
+ Year = {2004}}
+
+ at inproceedings{stellner96cocheck,
+ Address = {Honolulu, Hawaii},
+ Author = {Georg Stellner},
+ Booktitle = {{Proceedings of the 10th International Parallel Processing Symposium ({IPPS} '96)}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Title = {{CoCheck: Checkpointing and Process Migration for {MPI}}},
+ Url = {citeseer.ist.psu.edu/stellner96cocheck.html},
+ Year = {1996},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/stellner96cocheck.html}}
+
+ at inproceedings{823236,
+ Address = {Washington, DC, USA},
+ Author = {Adnan Agbaria and Roy Friedman},
+ Booktitle = {HPDC '99: Proceedings of the The Eighth IEEE International Symposium on High Performance Distributed Computing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Isbn = {0-7695-0287-3},
+ Pages = {31},
+ Publisher = {IEEE Computer Society},
+ Title = {{Starfish: Fault-Tolerant Dynamic MPI Programs on Clusters of Workstations}},
+ Year = {1999}}
+
+ at inproceedings{WNMKB05,
+ Abstract = {Divide-and-conquer is a well-suited programming paradigm for
+parallel Grid applications. Our Satin system efficiently schedules the
+fine-grained tasks of a divide-and-conquer application across multiple clusters
+in a grid. To accomodate long-running applications, we present a fault-tolerance
+mechanism for Satin that has negligible overhead during normal execution, while
+minimizing the amount of redundant work done after a crash of one or more nodes.
+We study the impact of our fault-tolerance mechanism on application efficiency,
+both on the Dutch DAS-2 system and using the European testbed of the EC-funded
+project GridLab.},
+ Address = {Denver, Colorado, USA},
+ Author = {Gosia Wrzesinska and Rob V. van Niewpoort and Jason Maassen and Thilo Kielmann and Henri E. Bal},
+ Booktitle = {Proceedings of the 19th IEEE International Parallel and Distributed Processing Symposium(IPDPS'05)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Key = {Grid, Fehlertoleranz},
+ Title = {{Fault-tolerant Scheduling of Fine-grained Tasks in Grid Environments}},
+ Year = {2005}}
+
+ at inproceedings{Stellner95,
+ Address = {Lyon},
+ Author = {Georg Stellner and Jim Pruyne},
+ Booktitle = {Proceedings of the Second European {PVM} User Group Meeting},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Title = {{Resource Management and Checkpointing for {PVM}}},
+ Url = {http://wwwbode.informatik.tu-muenchen.de/~stellner/EPVMUG95.ps},
+ Year = {1995},
+ Bdsk-Url-1 = {http://wwwbode.informatik.tu-muenchen.de/~stellner/EPVMUG95.ps}}
+
+ at phdthesis{Lanfermann2003,
+ Author = {G. Lanfermann},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 23:34:12 +0200},
+ School = {University of Potsdam, Germany},
+ Title = {{Nomadic Migration~-- A Service Environment for Autonomic Computing on the Grid}},
+ Year = {2003}}
+
+ at mastersthesis{Stasch05,
+ Author = {Kay Stasch},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ School = {University of Potsdam, Germany},
+ Title = {{Architektur fehlertoleranter Web Services}},
+ Year = {2005}}
+
+ at book{Cohen,
+ Address = {Upper Saddle River, New Jersey, USA},
+ Author = {Frank Cohen},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Publisher = {Pearson Education Inc.},
+ Shorttitle = {Java Testing and Design},
+ Title = {{Java Testing and Design -- From Unit Testing to automated Web Testing}},
+ Year = {2004}}
+
+ at misc{Selic,
+ Author = {Brian Selic},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:13 +0100},
+ Howpublished = {{ \url{http://www-128.ibm.com/developerworks/rational/library/114.html}}},
+ Journal = {IBM developerWorks},
+ Title = {{Fault tolerance techniques for distributed systems}},
+ Year = {2004}}
+
+ at inproceedings{condor-hunter,
+ Author = {M. Litzkow and M. Livny and M. Mutka},
+ Booktitle = {Proceedings of the 8th International Conference of Distributed Computing Systems},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-28 11:50:37 +0200},
+ Title = {{{C}ondor - A Hunter of Idle Workstations}},
+ Year = {1988}}
+
+ at misc{Fricke02,
+ Author = {C. Fricke},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {Student's Thesis, University of Potsdam},
+ Key = {Fricke},
+ Title = {{{C}haracterizing Networks through the {G}rid {O}bject {D}escription {L}anguage}},
+ Year = {2003}}
+
+ at misc{Humphrey,
+ Author = {Marty Humphrey and Mary Thompson},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.ggf.org/documents/GFD.12.pdf}},
+ Key = {Humphrey},
+ Title = {{Security Implications of Typical Grid Computing Usage Scenarios}},
+ Year = {2000}}
+
+ at misc{FOSTER01,
+ Author = {I. Foster and C. Kesselman and S. Tuecke},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:42:52 +0200},
+ Howpublished = {{\url{http://www.globus.org/research/papers/anatomy.pdf}}},
+ Title = {{The Anatomy of the Grid}},
+ Year = {2001}}
+
+ at misc{FOSTER02,
+ Author = {I. Foster},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:41:01 +0200},
+ Howpublished = {{ \url{www-fp.mcs.anl.gov/~foster/Articles/WhatIsTheGrid.pdf}}},
+ Title = {{What is the Grid}},
+ Year = {2002}}
+
+ at misc{OGSA,
+ Author = {I. Foster and C. Kesselman and J. Nick and S. Tuecke},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 13:40:00 +0200},
+ Howpublished = {\url{http://www-unix.globus.org/toolkit/3.0/ogsa/docs/physiology.pdf}},
+ Shorttitle = {{The Physiology of the Grid}},
+ Title = {{The Physiology of the Grid}},
+ Year = {2002}}
+
+ at article{diffie76new,
+ Author = {Whitfield Diffie and Martin E. Hellman},
+ Date = {November 1976},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Journal = {IEEE Transactions on Information Theory},
+ Number = {6},
+ Pages = {644--654},
+ Title = {{New Directions in Cryptography}},
+ Url = {citeseer.ist.psu.edu/diffie76new.html},
+ Volume = {IT-22},
+ Year = {1976},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/diffie76new.html}}
+
+ at inproceedings{Kornievskaia:2001:KCT,
+ Acknowledgement = ack-nhfb,
+ Author = {Olga Kornievskaia and Peter Honeyman and Bill Doster and Kevin Coffman},
+ Bibdate = {Tue Oct 15 16:52:27 2002},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Title = {{{Kerberized} Credential Translation: {A} Solution to {Web} Access Control}},
+ Url = {http://www.usenix.org/publications/library/proceedings/sec01/kornievskaia.html},
+ Year = {2001},
+ Bdsk-Url-1 = {http://www.usenix.org/publications/library/proceedings/sec01/kornievskaia.html}}
+
+ at inproceedings{582055,
+ Address = {New York, NY, USA},
+ Author = {Patrick C. Moore and Wilbur R. Johnson and Richard J. Detry},
+ Booktitle = {Supercomputing '01: Proceedings of the 2001 ACM/IEEE conference on Supercomputing (CDROM)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Doi = {http://doi.acm.org/10.1145/582034.582055},
+ Isbn = {1-58113-293-X},
+ Location = {Denver, Colorado},
+ Pages = {21--21},
+ Publisher = {ACM Press},
+ Title = {{Adapting Globus and Kerberos for a Secure ASCI Grid}},
+ Year = {2001},
+ Bdsk-Url-1 = {http://doi.acm.org/10.1145/582034.582055}}
+
+ at inproceedings{LS05,
+ Author = {A. Luckow and B. Schnor},
+ Booktitle = {15th European PVM/MPI User's Group Meeting - Sorento, Italy},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:55:29 +0200},
+ Pages = {258-267},
+ Title = {{{Migol: A Fault Tolerant Service Framework for MPI Applications in the Grid}}},
+ Year = {2005}}
+
+ at misc{cws,
+ Author = {{Globus Alliance}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {Available at: \url{http://www.globus.org/toolkit/docs/4.0/common/cwscore/C-GT4-WS-Design.pdf}},
+ Title = {{GT4.0 C WS Design -- Design of Web Services in C for the Globus Toolkit}},
+ Year = {2005}}
+
+ at article{gropp,
+ Author = {William Gropp and Ewing Lusk},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Journal = {High Performance Computing and Applications},
+ Title = {{Fault Tolerance in MPI Programs}},
+ Year = {2002}}
+
+ at misc{oasis-wstrust,
+ Author = {Steve Anderson and Jeff Bohren and Toufic Boubez and Marc Chanliau and Giovanni Della-Libera and Brendan Dixon and Praerit Garg and Phillip Hallam-Baker and Maryann Hondo and Chris Kaler and Hal Lockhart and Robin Martherus and Hiroshi Maruyama and Nataraj Nagaratnam and Andrew Nash and Rob Philpott and Darren Platt and Hemma Prafullchandra and Maneesh Sahu and John Shewchuk and Dan Simon and Davanum Srinivas and Elliot Waingold and David Waite and Doug Walter and Riaz Zolfonoon},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{ftp://www6.software.ibm.com/software/developer/library/ws-trust.pdf}},
+ Title = {{Web Service Trust Language (WS-Trust)}},
+ Year = {2005}}
+
+ at misc{tls,
+ Author = {T. Dierks and C. Allen},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.ietf.org/rfc/rfc2246.txt}},
+ Title = {{The TLS Protocol Version 1.0}},
+ Year = {1999}}
+
+ at misc{proxycert,
+ Author = {S. Tuecke and D. Engert and I. Foster and V. Welch and U. Chicago and M. Thompson and L. Pearlman and C. Kesselman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.ggf.org/security/gsi/draft-ggf-gsi-proxy-04.pdf}},
+ Title = {{{Internet X.509 Public Key Infrastructure Proxy Certificate Profile}}},
+ Year = {abgerufen 05/2005}}
+
+ at misc{proxycert-rfc,
+ Author = {S. Tuecke and V. Welch and D. Engert and L. Pearlman and M. Thompson},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.ietf.org/rfc/rfc3820.txt}},
+ Title = {{{RFC 3820: Internet X.509 Public Key Infrastructure (PKI) Proxy Certificate Profile}}},
+ Year = {2004}}
+
+ at misc{attrcert-rfc,
+ Author = {S. Farrell and R. Housley},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.ietf.org/rfc/rfc3281.txt}},
+ Title = {{{RFC 3281: An Internet Attribute Certificate Profile for Authorization}}},
+ Year = {2002}}
+
+ at article{Chen,
+ Author = {Chen, D. and others},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www2.twgrid.org/CERN_news/osgc-2.pdf}},
+ Journal = {Nucl. Instrum. Meth.},
+ Pages = {80-84},
+ Title = {{OGSA Globus Toolkit 3 Evaluation Activity at CERN}},
+ Volume = {A534},
+ Year = {2004}}
+
+ at inproceedings{DBLP:conf/eagc/AlfieriCCdFGLS03,
+ Author = {R. Alfieri and R. Cecchini and V. Ciaschini and L. dell'Agnello and {\'A}. Frohner and A. Gianoli and K. L{\"o}rentey and F. Spataro},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {European Across Grids Conference},
+ Crossref = {DBLP:conf/eagc/2003},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-19 09:15:16 +0200},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\ &}volume=2970{\&}spage=33},
+ Pages = {33-40},
+ Title = {{VOMS, an Authorization System for Virtual Organizations}},
+ Year = {2003}}
+
+ at proceedings{DBLP:conf/eagc/2003,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {European Across Grids Conference},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Editor = {F. Fern{\'a}ndez Rivera and Marian Bubak and A. G{\'o}mez Tato and Ramon Doallo},
+ Isbn = {3-540-21048-2},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Grid Computing, First European Across Grids Conference, Santiago de Compostela, Spain, February 13-14, 2003, Revised Papers},
+ Volume = {2970},
+ Year = {2004}}
+
+ at misc{ogsi,
+ Author = {S. Tuecke and I. Foster and C. Kesselman},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:41:15 +0200},
+ Howpublished = {\url{http://www-unix.globus.org/toolkit/draft-ggf- ogsi-gridservice-33_2003-06-27.pdf}},
+ Title = {{Open Grid Service Infrastructure}},
+ Year = {2003}}
+
+ at misc{globus-wsrf,
+ Author = {Globus},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.globus.org/wsrf/faq.asp#wsrf10}},
+ Title = {{Globus WSRF FAQ}},
+ Year = {2004}}
+
+ at misc{wsrf-spec,
+ Author = {K. Czajkowski and D. Ferguson and I. Foster and J. Frey and S. Graham and I. Sedukhin and D. Snelling and S. Tuecke and W. Vambenepe},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:41:47 +0200},
+ Howpublished = {\url{http://www.oasis-open.org/committees/download.php/6796/ws-wsrf.pdf}},
+ Title = {{The WS-Resource Framework}},
+ Year = {2005}}
+
+ at misc{wsrf-res,
+ Author = {I. Foster and J. Frey and S. Graham and S. Tuecke and K. Czajkowski and D. Ferguson and F. Leymann and M. Nally and I. Sedukhin and D. Snelling and T. Vambenepe and S. Weerawarana},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:42:37 +0200},
+ Howpublished = {\url{http://www-106.ibm.com/developerworks/library/ws-resource/ws- modelingresources.html}},
+ Title = {{Modeling Stateful Resources with Web Services}},
+ Year = {2004}}
+
+ at misc{ws-addr,
+ Author = {W3C},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.w3.org/Submission/ws-addressing/}},
+ Title = {{Web Services Addressing (WS-Addressing)}},
+ Year = {2004}}
+
+ at inproceedings{globus,
+ Author = {I. Foster},
+ Booktitle = {{Proceedings of IFIP International Conference on Network and Parallel Computing}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 19:02:14 +0200},
+ Howpublished = {\url{http://www.globus.org/alliance/publications/papers/IFIP-2006.pdf}},
+ Pages = {2--13},
+ Publisher = {Springer-Verlag LNCS 3779},
+ Title = {{Globus Toolkit Version 4: Software for Service-Oriented Systems}},
+ Year = {2006}}
+
+ at misc{top500,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-01-04 14:35:20 +0100},
+ Howpublished = {\url{http://www.top500.org/lists/}},
+ Key = {top},
+ Title = {{{Top 500 Supercomputer Sites}}},
+ Year = {2007}}
+
+ at misc{cactus,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.cactuscode.org/}},
+ Key = {cactus},
+ Title = {{{CactusCode Homepage}}},
+ Year = {2005}}
+
+ at misc{nws,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://nws.cs.ucsb.edu}},
+ Key = {NWS, Monitoring},
+ Title = {{NWS Homepage}},
+ Year = {2005}}
+
+ at misc{ogce,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.collab-ogce.org/}},
+ Key = {OGCE},
+ Title = {{Open Grid Computing Environment Homepage}},
+ Year = {2005}}
+
+ at misc{jsr168,
+ Author = {Alejandro Abdelnur and Stefan Hepper},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://jcp.org/aboutJava/communityprocess/final/jsr168/index.html}},
+ Title = {{Java Portlet Specification}},
+ Year = {2003}}
+
+ at book{fosterTheGrid2,
+ Address = {San Francisco, California, USA},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:47:22 +0200},
+ Editor = {I. Foster and C. Kesselmann},
+ Publisher = {Morgan Kaufmann Publishers},
+ Shorttitle = {The Grid 2},
+ Title = {{The Grid: Blueprint for a New Computing Infrastructure Second Edition}},
+ Year = {2004}}
+
+ at book{schnor99,
+ Address = {Aachen},
+ Author = {B. Schnor},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:57:08 +0200},
+ Publisher = {Shaker Verlag},
+ Title = {{Scheduling and Migration Strategies for Parallel Applications on Distributed Systems}},
+ Year = {1999}}
+
+ at inproceedings{SPOL96,
+ Address = {Dijon},
+ Author = {B. Schnor and S. Petri and R. Oleyniczak and H. Langend\"orfer},
+ Booktitle = {{Proceedings of the ISCA 9th International Conference on Parallel and Distributed Computing Systems}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:57:56 +0200},
+ Editor = {Koukou Yetongnon and Salim Hariri},
+ Isbn = {1-880843-17-X},
+ Optorganization = {{ISCA}},
+ Pages = {330--337},
+ Publisher = {ISCA},
+ Title = {{Scheduling of Parallel Applications on Heterogeneous Workstation Clusters}},
+ Volume = {1},
+ Year = {1996}}
+
+ at article{Pet95,
+ Author = {Stefan Petri and Horst Langend{\"o}rfer},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Journal = {Operating Systems Review},
+ Number = {4},
+ Pages = {25--36},
+ Title = {{Load Balancing and Fault Tolerance in Workstation Clusters~-- Migrating Groups of Communicating Processes}},
+ Volume = {29},
+ Year = {1995}}
+
+ at inproceedings{BBGL96,
+ Address = {Herzliya},
+ Author = {Amnon Barak and Avner Braverman and Ilia Gilderman and O. Laaden},
+ Booktitle = {{Proceedings of the 7th Israeli Conference on Computer Systems and Software Engineering}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Pages = {38--45},
+ Title = {{Performance of PVM with the MOSIX Preemptive Process Migration}},
+ Year = {1996}}
+
+ at inbook{Berman,
+ Address = {San Francisco, California, USA},
+ Chapter = {The Scientific Imperative},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:47:12 +0200},
+ Editor = {I. Foster and C. Kesselmann},
+ Publisher = {Morgan Kaufmann Publishers},
+ Title = {The Grid: Blueprint for a New Computing Infrastructure Second Edition},
+ Year = {2004}}
+
+ at misc{gridsphere,
+ Author = {Jason Novotny and Michael Russell and Oliver Wehrens},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.gridsphere.org/gridsphere/wp-4/Documents/France/ gridsphere.pdf}},
+ Title = {{GridSphere: An Advanced Portal Framework}},
+ Year = {2004}}
+
+ at misc{gridsphereWeb,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.gridsphere.org/}},
+ Key = {gridsphere},
+ Title = {{GridSphere Homepage}},
+ Year = {2005}}
+
+ at misc{novotny,
+ Author = {Jason Novotny},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www-106.ibm.com/developerworks/grid/library/gr- portlets/}},
+ Title = {{Developing grid portlets using the GridSphere portal framework}},
+ Year = {2004}}
+
+ at unpublished{luckow04,
+ Author = {Andre Luckow and Nicole Hallama},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {Student's Thesis, University of Potsdam},
+ Note = {Student's Thesis, University of Potsdam},
+ Title = {{Grid Services auf Basis von C/C++}},
+ Year = {2004}}
+
+ at inproceedings{DBLP:conf/europar/FlorosC04,
+ Author = {E. Floros and Yannis Cotronis},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Crossref = {DBLP:conf/europar/2004},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\ &}volume=3149{\&}spage=436},
+ Pages = {436-443},
+ Title = {{Exposing MPI Applications as Grid Services.}},
+ Year = {2004}}
+
+ at proceedings{DBLP:conf/europar/2004,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Editor = {Marco Danelutto and Marco Vanneschi and Domenico Laforenza},
+ Isbn = {3-540-22924-8},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {{Euro-Par 2004 Parallel Processing, 10th International Euro-Par Conference, Pisa, Italy, August 31-September 3, 2004, Proceedings}},
+ Volume = {3149},
+ Year = {2004}}
+
+ at inproceedings{DBLP:conf/pvm/PuppinTL04,
+ Author = {Diego Puppin and Nicola Tonellotto and Domenico Laforenza},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {PVM/MPI},
+ Crossref = {DBLP:conf/pvm/2004},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\ &}volume=3241{\&}spage=207},
+ Pages = {207-214},
+ Title = {{Using Web Services to Run Distributed Numerical Applications.}},
+ Year = {2004}}
+
+ at proceedings{DBLP:conf/pvm/2004,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {PVM/MPI},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Editor = {Dieter Kranzlm{\"u}ller and P{\'e}ter Kacsuk and Jack Dongarra},
+ Isbn = {3-540-23163-3},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {{Recent Advances in Parallel Virtual Machine and Message Passing Interface, 11th European PVM/MPI Users' Group Meeting, Budapest, Hungary, September 19-22, 2004, Proceedings}},
+ Volume = {3241},
+ Year = {2004}}
+
+ at inproceedings{823401,
+ Address = {Washington, DC, USA},
+ Author = {V. Welch and F. Siebenlist and I. Foster and J. Bresnahan and K. Czajkowski and J. Gawor and C. Kesselman and S. Meder and L. Pearlman and S. Tuecke},
+ Booktitle = {HPDC '03: Proceedings of the 12th IEEE International Symposium on High Performance Distributed Computing (HPDC'03)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:45:42 +0200},
+ Isbn = {0-7695-1965-2},
+ Pages = {48},
+ Publisher = {IEEE Computer Society},
+ Title = {{Security for Grid Services}},
+ Year = {2003}}
+
+ at article{NWS99,
+ Author = {R. Wolski and N. Spring and J. Hayes},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 13:31:20 +0200},
+ Journal = {Journal of Future Generation Computing Systems},
+ Key = {NWS, Monitoring},
+ Number = {5-6},
+ Pages = {757-768},
+ Title = {{The Network Weather Service: A Distributed Resource Performance Forecasting Service for Metacomputing}},
+ Volume = {15},
+ Year = {1999}}
+
+ at misc{xml-schema,
+ Author = {W3C Consortium},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.w3.org/XML/Schema}},
+ Title = {{XML Schema}}}
+
+ at inbook{siebenlist,
+ Address = {San Francisco, California, USA},
+ Author = {F. Siebenlist and N. Nagaratnam and V. Welch and C. Neuman},
+ Chapter = {Security for Virtual Organizations: Federation Trust and Policy Domains},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:46:57 +0200},
+ Editor = {I. Foster and C. Kesselmann},
+ Publisher = {Morgan Kaufmann Publishers},
+ Title = {The Grid: Blueprint for a New Computing Infrastructure Second Edition},
+ Year = {2004}}
+
+ at techreport{MS04,
+ Author = {M. Mihahn and B. Schnor},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:56:30 +0200},
+ Institution = {University Potsdam},
+ Title = {{Fault-Tolerant Grid Peer Services}},
+ Year = {2004}}
+
+ at inproceedings{gsoap,
+ Address = {Washington, DC, USA},
+ Author = {Robert Van Engelen and Kyle Gallivan},
+ Booktitle = {CCGRID '02: Proceedings of the 2nd IEEE/ACM International Symposium on Cluster Computing and the Grid},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Isbn = {0-7695-1582-7},
+ Pages = {128},
+ Publisher = {IEEE Computer Society},
+ Title = {{The gSOAP Toolkit for Web Services and Peer-to-Peer Computing Networks}},
+ Year = {2002}}
+
+ at inproceedings{gsiplugin-itcc,
+ Author = {Aloisio, Giovanni and Cafaro, Massimo and Epicoco, Italo and Lezzi, Daniele},
+ Booktitle = {Proceedings of Information Technology Coding and Computing (ITCC 2005)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Keywords = {WP7},
+ Pages = {304-309},
+ Publisher = {IEEE Press},
+ Title = {{The GSI plug-in for gSOAP: Enhanced Security, Performance, and Reliability}},
+ Volume = {I},
+ Year = {2005}}
+
+ at inproceedings{gram,
+ Address = {London, UK},
+ Author = {K. Czajkowski and I. Foster and N. Karonis and C. Kesselman and S. Martin and W. Smith and S. Tuecke},
+ Booktitle = {IPPS/SPDP '98: Proceedings of the Workshop on Job Scheduling Strategies for Parallel Processing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:40:31 +0200},
+ Isbn = {3-540-64825-9},
+ Pages = {62--82},
+ Publisher = {Springer-Verlag},
+ Title = {{A Resource Management Architecture for Metacomputing Systems}},
+ Year = {1998}}
+
+ at misc{lsf,
+ Author = {S. Zhou},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {Workshop on Cluster Computing},
+ Key = {LSF},
+ Title = {{{LSF}: {L}oad Sharing in Large-scale Heterogenous Distributed Systems}},
+ Year = {1992}}
+
+ at techreport{pbs,
+ Author = {Robert Henderson and Dave Tweten},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Institution = {NASA Ames Research Center},
+ Title = {{{P}ortable {B}atch {S}ystem: External Reference Specification}},
+ Year = {1996}}
+
+ at inproceedings{nws-paper,
+ Author = {R. Wolski},
+ Booktitle = {Cluster Computing},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Key = {NWS, Monitoring},
+ Pages = {119--132},
+ Title = {Dynamically forecasting network performance using the {N}etwork {W}eather {S}ervice},
+ Year = {1998}}
+
+ at inproceedings{DBLP:conf/europar/MonteroHL03,
+ Author = {Rub{\'e}n Montero and Eduardo Huedo and Ignacio Mart\'{\i}n Llorente},
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Crossref = {DBLP:conf/europar/2003},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\ &}volume=2790{\&}spage=366},
+ Pages = {366-373},
+ Title = {{Grid Resource Selection for Opportunistic Job Migration}},
+ Year = {2003}}
+
+ at proceedings{DBLP:conf/europar/2003,
+ Bibsource = {DBLP, http://dblp.uni-trier.de},
+ Booktitle = {Euro-Par},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Editor = {Harald Kosch and L{\'a}szl{\'o} B{\"o}sz{\"o}rm{\'e}nyi and Hermann Hellwagner},
+ Isbn = {3-540-40788-X},
+ Publisher = {Springer},
+ Series = {Lecture Notes in Computer Science},
+ Title = {Euro-Par 2003. Parallel Processing, 9th International Euro-Par Conference, Klagenfurt, Austria, August 26-29, 2003. Proceedings},
+ Volume = {2790},
+ Year = {2003}}
+
+ at inproceedings{Montero05,
+ Author = {E. Hudo and R. Montero and I. Llorente},
+ Booktitle = {Scalable computing: practice and experience (SCPE)},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-25 23:56:45 +0200},
+ Number = {3},
+ Title = {{The GridWay Framework for Adaptive Scheduling and Execution on Grids}},
+ Volume = {6},
+ Year = {2005}}
+
+ at book{sanders97,
+ Address = {Berlin},
+ Author = {P. Sanders and T. Worsch},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 11:12:50 +0200},
+ Publisher = {Logos Verlag},
+ Title = {{Parallele Programmierung mit MPI - ein Praktikum -}},
+ Year = {1997}}
+
+ at misc{cellularautomat,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.cs.uni-potsdam.de/bs/cellularautomat/}},
+ Title = {{The Cellular Automaton}},
+ Year = {2006}}
+
+ at techreport{Alloc,
+ Author = {W. Allcock and R. Madduri},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-19 09:14:00 +0200},
+ Institution = {Argonne National Laboratory},
+ Title = {Lessons learned producing an OGSI compliant Reliable File Transfer Service},
+ Year = {2003}}
+
+ at misc{gram-ft,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www-unix.globus.org/toolkit/docs/3.2/gram/ws/developer/ fault_tolerance.html}},
+ Key = {GRAM FT},
+ Title = {WS GRAM Fault Tolerance Architecture},
+ Year = {2003}}
+
+ at misc{sge,
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-16 10:36:07 +0200},
+ Howpublished = {\url{http://gridengine.sunsource.net/}},
+ Key = {SGE},
+ Title = {{Sun Grid Engine}},
+ Year = {2008}}
+
+ at techreport{csf,
+ Author = {C. Smith},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:39:47 +0200},
+ Howpublished = {\url{http://prdownloads.sourceforge.net/gcsf/CSF_architecture.pdf}},
+ Institution = {Platform Computing Inc.},
+ Title = {{Open Source Metascheduling for Virtual Organizations with the Community Scheduler Framework (CSF)}},
+ Year = {2003}}
+
+ at misc{DRMAA,
+ Author = {A. Haas and R. Brobst and A. Haas and N. Geib and H. Rajic and D. Templeton and J. Tollefsrud and P. Tr\"oger},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-22 18:12:23 +0200},
+ Howpublished = {\url{http://forge.gridforum.org/projects/drmaa-wg/}},
+ Title = {{Distributed Resource Management Application API}},
+ Year = {2005}}
+
+ at misc{WSA,
+ Author = {Alain Andrieux and Karl Czajkowski and Asit Dan and Kate Keahey and Heiko Ludwig and Jim Pruyne and John Rofrano and Steve Tuecke and Ming Xu},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://forge.gridforum.org/projects/graap-wg/}},
+ Title = {{Web Services Agreement Specification}},
+ Year = {2005}}
+
+ at misc{csf-eval,
+ Author = {Amit Jain and Protik Mukherjee and Shilpa Arora},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2007-12-08 18:56:14 +0100},
+ Howpublished = {\url{http://www.comp.nus.edu.sg/~wangxb/SMA5505-report1-Amit%20Jain.pdf}},
+ Title = {Meta-Scheduler for Grid Computing},
+ Year = {2005}}
+
+ at inproceedings{foster98security,
+ Author = {I. Foster and C. Kesselman and G. Tsudik and S. Tuecke},
+ Booktitle = {{ACM Conference on Computer and Communications Security}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-06-30 19:46:06 +0200},
+ Pages = {83-92},
+ Title = {{A Security Architecture for Computational Grids}},
+ Url = {citeseer.ist.psu.edu/foster98security.html},
+ Year = {1998},
+ Bdsk-Url-1 = {citeseer.ist.psu.edu/foster98security.html}}
+
+ at inproceedings{SPB98,
+ Abstract = {{Consistent maintenance of distributed data is
+important in application areas like groupware and for runtime support
+for parallel computing. We examine the performance of different
+multicast based methods for maintaining the consistency of distributed
+data depending on the network topology and concurrency.
+
+Our prototype software implements ordered, reliable multicasts on top
+of the unreliable IP broad- or multicast with three different
+methods (Master-Slave, Token Exchange on Demand, Totem Single
+Ring). This paper shows measurement results for the efficiency and
+scalability of the three methods in different topologies.
+
+The measurements confirm earlier analytical results. Totem behaves
+well in large networks with many concurrent senders. The overhead of
+Token on Demand and of the Master-Slave algorithm is almost the same.
+Also we could not find an indication for the often-read opinion that
+the Master-Slave approach scales worse because of the central
+bottleneck.}},
+ Author = {B. Schnor and S. Petri and M. Becker},
+ Booktitle = {{Proceedings of the 24th EUROMICRO Conference, V{\"a}ster{\aa}s, Sweden, August 25-27, 1998}},
+ Date-Added = {2007-12-08 18:56:12 +0100},
+ Date-Modified = {2008-05-24 18:57:44 +0200},
+ Isbn = {0-8186-8646-4},
+ Month = aug,
+ Optaddress = {Los Alamitos, CA},
+ Optnote = {{{\em 1st Euromicro Workshop on Network Computing} at the EuroMicro'98 Conference, August 25-27, 1998, V{\"a}ster{\aa}s, Sweden}},
+ Pages = {969--975},
+ Publisher = {IEEE Computer Society},
+ Title = {{Scalability of Multicast Based Synchronization Methods}},
+ Year = {1998}}
+
+ at comment{BibDesk Static Groups{
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<array>
+ <dict>
+ <key>group name</key>
+ <string>Group</string>
+ <key>keys</key>
+ <string>schneider90implementing,Pet95,blagojevic00,nws-paper,DBLP:journals/tc/Lamport79,reservations,1152086,OGSA,Cohen,cumulvs,DBLP:conf/iwqos/2003,csf-eval,jacobson88congestion,wsrf-spec,KERBRFC,78972,birman05,DBLP:conf/pvm/2004,schnor99,SR07,KK04,siebenlist,DBLP:conf/europar/MonteroHL03,welchfoster04,ferraiolo92rolebased,gram-ft,globus-ars,gridsphereWeb,fosterTheGrid2,jsr168,ogce,DBLP:conf/europar/2004,gridportlets,jackson01core,unicore-security,andrew-towards,gsiplugin-itcc,823372,xml-schema,Traff:2007fk,XACML,249065,hallama06,NWS99,tls,SGLW07,uddi-spec,condor-hunter,CLC05,DBLP:conf/europar/RoblitzR06,Montero05,DBLP:conf/sofsem/2006,schnorLuckow08,WNMKB05,feller07,wsrf-res,Hwang:2003gf,UDDI-replication,proxycert-rfc,SPB98,823236,944148,ws-addr,rfc2904,582055,Berman,DBLP:conf/parco/StreitWWZ05,Humphrey,Cactus_Goodale03a,DBLP:conf/ipps/2006,x509,Fricke02,DBLP:conf/eagc/2003,1115721,Luckow05,sanders97,Chen,welchBarton05,grinder,976113,gt4sec,Cactus_Allen01e,top500,Goscinsky91,Cactus_Talbot01a,Cactus_Allen01a,unigrids,proxycert,DBLP:conf/sofsem/Schiper06,diffie76new,Kornievskaia:2001:KCT,rls-paper,oasis-wstrust,Lee:2002vn,gsoap,gt4-foster,cws,pbs,cas,unicore,770786,ws-agreement06,ogsi,BurchardEtAl-2005-autonomy,lang06,WSA,cas-paper,DBLP:conf/iwqos/BurchardD03,CGIKS07,cactus,ban98design,UDDI-public,attrcert-rfc,nws,VD03,kerberos-prot-trans,UDDI-org,762815,DBLP:conf/europar/2003,TG07,Selic,823401,DBLP:conf/europar/2006,Stellner95,LSS03,gridsphere,GT07,cellularautomat,stellner96cocheck,gt4-deleg,sge,SAGA_Jha07b,Boner2007,DBLP:conf/pvm/PuppinTL04,FOSTER02,Amir95,bbgrid,DBLP:conf/europar/FlorosC04,FOSTER01,BBGL96,Zhang04,SAGA_Goodale06a,DBLP:conf/parco/2005,SPOL96,lsf,RoeblitzRzadca06,1029434,drmaa07,csf,SBBSB07,oasis-wssc,GRIPARCH,Cactus_Talbot00a,DBLP:conf/eagc/AlfieriCCdFGLS03,PTIW07,1135831,foster98security,DRMAA,Stasch05,gram,Alloc,Chandy82,unicore-globus,Chadwick06,schnorLuckow06,LS05,gtsecurity,SNMP07,novotny,WSSEC2,unicore-delegation,birman99bimodal,gropp,MS04,springel-2005-364,BSL07,globus-wsrf,Welch2005,LSGHGH07,schopf06,TDAN07,myproxy,TTL03,Lanfermann2003,dgrid,globus,luckow04</string>
+ </dict>
+</array>
+</plist>
+}}
+
+ at techreport{ogf_cpr_arch,
+ author = {Nathan Stone and Derek Simmel and Thilo Kielmann and
+ Andre Merzky},
+ title = {{GFD.93 -- An Architecture for Grid Checkpoint and Recovery Services}},
+ year = {2007},
+ institution = {{Open Grid Forum}},
+ type = {{OGF Informational Document}},
+}
+
+ at techreport{ogf_cpr_uc,
+ author = {Rosa Badia and Robert Hood and Thilo Kielmann and
+ Andre Merzky and Christine Morin and Stephen Pickles and
+ Massimo Sgaravatto and Paul Stodghill and Nathan Stone and
+ Heon Yeom},
+ title = {{GFD.92 -- Use-Cases and Requirements for Grid Checkpoint and Recovery}},
+ year = {2006},
+ institution = {{Open Grid Forum}},
+ type = {{OGF Informational Document}},
+}
+
+ at techreport{saga_cpr_draft,
+ author = {Andre Merzky},
+ title = {{SAGA Extension: Checkpoint and Recovery API (CPR)}},
+ year = {2007},
+ OPTinstitution = {{Open Grid Forum}},
+ type = {{OGF Informational Document, SAGA Core Working Group}},
+ Howpublished = {\url{http://forge.ogf.org/short/saga-core-wg/drafts}}
+}
+
+" " , Lyon, France, May 19-22, 2008 (pdf)
+
+
+ at inproceedings{xtreemos_cpr,
+ author = {John Mehnert-Spahn and Michael Schöttner and Thomas Ropars and David Margery
+ and Christine Morin and Julita Corbalán and Toni Cortes},
+ booktitle = {CCGRID '08: IEEE International Symposium on Cluster Computing and the Grid
+ (poster)},
+ location = {Lyon, France},
+ title = {{XtreemOS Grid Checkpointing Architecture}},
+ month = {May},
+ year = {2008},
+}
+
+ at misc{allpairs, note = {All-Pairs: An Abstraction for Data Intensive
+ Cloud Computing, Christ opher Moretti, Jared
+ Bulosan, Douglas Thain, and Patrick Flynn, IEEE
+ International Parallel and Distributed Processing
+ Symposium (IPDPS), April 2008.}}
+
+ at misc{saga-core,
+ author = {{T Goodale and {\it et al} }}, title="{A Simple API for Grid Applications (SAGA)}",
+ note = {http://www.ogf.org/documents/GFD.90.pdf}
+}
+
+ at misc{novelsubmissionmode,
+ author = {{P. Chakraborty and S. Jha and D. Katz}},
+ title = {Novel Submission Modes of Tightly-coupled Jobs Across
+ Distributed Resources}},
+ url = {Accepted for Publication in Phil. Trans. of the Royal Society A}
+}
+
+ at misc{saga_condor_url, note = {{http://fortytwo.cct.lsu.edu:8000/SAGA/wiki/CondorAdaptor}}}
+
+ at misc{saga_condor,
+ author = {{Shantenu Jha}},
+ OPTtitle = {{Being Optimally Lazy: The case for integrating SAGA
+ with Condor, Talk given at Condor Week 2008}},
+ OPTurl = {http://www.cs.wisc.edu/condor/CondorWeek2008/condor\_presentations/jha\_saga.pdf},
+}
+
+ at misc{saga_core_long,
+ author = {{Tom Goodale, Shantenu Jha, Hartmut Kaiser, Thilo
+ Kielmann, Pascal Kleije\ r, Andre Merzky, John
+ Shalf, Christopher Smith}},
+ url = {http://www.ogf.org/documents/GFD.90.pdf},
+}
+
+
+ at misc{buyya_hpcc, note = {R. Buyya, and et al, {\it
+ Market-Oriented Cloud Computing: Vision, Hype, and
+ Reality for Delivering IT Services as Computing
+ Utilities}, Keynote Paper, Proc. of the 10th
+ IEEE Intl Conf. on HPCC, Sept. 25-27, 2008, China},
+ url = {http://www.gridbus.org/papers/hpcc2008_keynote_cloudcomputing.pdf}}
+
+
+ at misc{eucalyptus, note = {Elastic Utility Computing Architecture for Linking Your Programs To Useful Systems (EUCALYPTUS), http://eucalyptus.cs.ucsb.edu/}}
+
+ at misc{nimbus, note = {NIMBUS http://workspace.globus.org/}}
\ No newline at end of file
More information about the saga-devel
mailing list