diff --git a/src/applications/aphlict/management/PhabricatorAphlictManagementWorkflow.php b/src/applications/aphlict/management/PhabricatorAphlictManagementWorkflow.php
--- a/src/applications/aphlict/management/PhabricatorAphlictManagementWorkflow.php
+++ b/src/applications/aphlict/management/PhabricatorAphlictManagementWorkflow.php
@@ -76,6 +76,7 @@
         array(
           'servers' => 'list<wild>',
           'logs' => 'optional list<wild>',
+          'cluster' => 'optional list<wild>',
           'pidfile' => 'string',
         ));
     } catch (Exception $ex) {
@@ -193,7 +194,7 @@
           'admin'));
     }
 
-    $logs = $data['logs'];
+    $logs = idx($data, 'logs', array());
     foreach ($logs as $index => $log) {
       PhutilTypeSpec::checkMap(
         $log,
@@ -219,6 +220,54 @@
       }
     }
 
+    $peer_map = array();
+
+    $cluster = idx($data, 'cluster', array());
+    foreach ($cluster as $index => $peer) {
+      PhutilTypeSpec::checkMap(
+        $peer,
+        array(
+          'host' => 'string',
+          'port' => 'int',
+          'protocol' => 'string',
+        ));
+
+      $host = $peer['host'];
+      $port = $peer['port'];
+      $protocol = $peer['protocol'];
+
+      switch ($protocol) {
+        case 'http':
+        case 'https':
+          break;
+        default:
+          throw new PhutilArgumentUsageException(
+            pht(
+              'Configuration file specifies cluster peer ("%s", at index '.
+              '"%s") with an invalid protocol, "%s". Valid protocols are '.
+              '"%s" or "%s".',
+              $host,
+              $index,
+              $protocol,
+              'http',
+              'https'));
+      }
+
+      $peer_key = "{$host}:{$port}";
+      if (!isset($peer_map[$peer_key])) {
+        $peer_map[$peer_key] = $index;
+      } else {
+        throw new PhutilArgumentUsageException(
+          pht(
+            'Configuration file specifies cluster peer "%s" more than '.
+            'once (at indexes "%s" and "%s"). Each peer must have a '.
+            'unique host and port combination.',
+            $peer_key,
+            $peer_map[$peer_key],
+            $index));
+      }
+    }
+
     $this->configData = $data;
     $this->configPath = $full_path;
 
diff --git a/src/applications/notification/client/PhabricatorNotificationClient.php b/src/applications/notification/client/PhabricatorNotificationClient.php
--- a/src/applications/notification/client/PhabricatorNotificationClient.php
+++ b/src/applications/notification/client/PhabricatorNotificationClient.php
@@ -19,6 +19,9 @@
 
   public static function tryToPostMessage(array $data) {
     $servers = PhabricatorNotificationServerRef::getEnabledAdminServers();
+
+    shuffle($servers);
+
     foreach ($servers as $server) {
       try {
         $server->postMessage($data);
diff --git a/src/docs/user/cluster/cluster.diviner b/src/docs/user/cluster/cluster.diviner
--- a/src/docs/user/cluster/cluster.diviner
+++ b/src/docs/user/cluster/cluster.diviner
@@ -132,6 +132,11 @@
 naturally somewhat resistant to data loss: every clone of a repository includes
 the entire history.
 
+Repositories may become a scalability bottleneck, although this is rare unless
+your install has an unusually heavy repository read volume. Slow clones/fetches
+may hint at a repository capacity problem. Adding more repository hosts will
+provide an approximately linear increase in capacity.
+
 For details, see @{article:Cluster: Repositories}.
 
 
@@ -146,6 +151,13 @@
 at least one host remains alive. Daemons are stateless, so spreading daemons
 across multiple hosts provides no resistance to data loss.
 
+Daemons can become a bottleneck, particularly if your install sees a large
+volume of write traffic to repositories. If the daemon task queue has a
+backlog, that hints at a capacity problem. If existing hosts have unused
+resources, increase `phd.taskmasters` until they are fully utilized. From
+there, adding more daemon hosts will provide an approximately linear increase
+in capacity.
+
 For details, see @{article:Cluster: Daemons}.
 
 
@@ -157,11 +169,37 @@
 
 With multiple web hosts, you can transparently survive the loss of any subset
 of hosts as long as at least one host remains alive. Web hosts are stateless,
-so putting multiple hosts in service provides no resistance to data loss.
+so putting multiple hosts in service provides no resistance to data loss
+because no data is at risk.
+
+Web hosts can become a bottleneck, particularly if you have a workload that is
+heavily focused on reads from the web UI (like a public install with many
+anonymous users). Slow responses to web requests may hint at a web capacity
+problem. Adding more hosts will provide an approximately linear increase in
+capacity.
 
 For details, see @{article:Cluster: Web Servers}.
 
 
+Cluster: Notifications
+======================
+
+Configuring multiple notification hosts is simple and has no pre-requisites.
+
+With multiple notification hosts, you can survive the loss of any subset of
+hosts as long as at least one host remains alive. Service may be breifly
+disrupted directly after the incident which destroys the other hosts.
+
+Notifications are noncritical, so this normally has little practical impact
+on service availability. Notifications are also stateless, so clustering this
+service provides no resistance to data loss because no data is at risk.
+
+Notification delivery normally requires very few resources, so adding more
+hosts is unlikely to have much impact on scalability.
+
+For details, see @{article:Cluster: Notifications}.
+
+
 Overlaying Services
 ===================
 
@@ -172,14 +210,14 @@
 
 In planning a cluster, consider these blended host types:
 
-**Everything**: Run HTTP, SSH, MySQL, repositories and daemons on a single
-host. This is the starting point for single-node setups, and usually also the
-best configuration when adding the second node.
+**Everything**: Run HTTP, SSH, MySQL, notifications, repositories and daemons
+on a single host. This is the starting point for single-node setups, and
+usually also the best configuration when adding the second node.
 
-**Everything Except Databases**: Run HTTP, SSH, repositories and daemons on one
-host, and MySQL on a different host. MySQL uses many of the same resources that
-other services use. It's also simpler to separate than other services, and
-tends to benefit the most from dedicated hardware.
+**Everything Except Databases**: Run HTTP, SSH, notifications, repositories and
+daemons on one host, and MySQL on a different host. MySQL uses many of the same
+resources that other services use. It's also simpler to separate than other
+services, and tends to benefit the most from dedicated hardware.
 
 **Repositories and Daemons**: Run repositories and daemons on the same host.
 Repository hosts //must// run daemons, and it normally makes sense to
@@ -208,8 +246,8 @@
 This section provides some guidance on reasonable ways to scale up a cluster.
 
 The smallest possible cluster is **two hosts**. Run everything (web, ssh,
-database, repositories, and daemons) on each host. One host will serve as the
-master; the other will serve as a replica.
+database, notifications, repositories, and daemons) on each host. One host will
+serve as the master; the other will serve as a replica.
 
 Ideally, you should physically separate these hosts to reduce the chance that a
 natural disaster or infrastructure disruption could disable or destroy both
@@ -230,7 +268,7 @@
 onto its own host).
 
 After separating databases, separating repository + daemon nodes is likely
-the next step.
+the next step to consider.
 
 To improve **availability**, add another copy of everything you run in one
 datacenter to a new datacenter. For example, if you have a two-node cluster,
diff --git a/src/docs/user/cluster/cluster_notifications.diviner b/src/docs/user/cluster/cluster_notifications.diviner
new file mode 100644
--- /dev/null
+++ b/src/docs/user/cluster/cluster_notifications.diviner
@@ -0,0 +1,174 @@
+@title Cluster: Notifications
+@group intro
+
+Configuring Phabricator to use multiple notification servers.
+
+Overview
+========
+
+WARNING: This feature is a very early prototype; the features this document
+describes are mostly speculative fantasy.
+
+You can run multiple notification servers. The advantages of doing this
+are:
+
+  - you can completely survive the loss of any subset so long as one
+    remains standing; and
+  - performance and capacity may improve.
+
+This configuration is relatively simple, but has a small impact on availability
+and does nothing to increase resitance to data loss.
+
+
+Clustering Design Goals
+=======================
+
+Notification clustering aims to restore service automatically after the loss
+of some nodes. It does **not** attempt to guarantee that every message is
+delivered.
+
+Notification messages provide timely information about events, but they are
+never authoritative and never the only way for users to learn about events.
+For example, if a notification about a task update is not delivered, the next
+page you load will still show the notification in your notification menu.
+
+Generally, Phabricator works fine without notifications configured at all, so
+clustering assumes that losing some messages during a disruption is acceptable.
+
+
+How Clustering Works
+====================
+
+Notification clustering is very simple: notification servers relay every
+message they receive to a list of peers.
+
+When you configure clustering, you'll run multiple servers and tell them that
+the other servers exist. When any server receives a message, it retransmits it
+to all the severs it knows about.
+
+When a server is lost, clients will automatically reconnect after a brief
+delay. They may lose some notifications while their client is reconnecting,
+but normally this should only last for a few seconds.
+
+
+Configuring Aphlict
+===================
+
+To configure clustering on the server side, add a `cluster` key to your
+Aphlict configuration file. For more details about configuring Aphlict,
+see @{article:Notifications User Guide: Setup and Configuration}.
+
+The `cluster` key should contain a list of `"admin"` server locations. Every
+message the server receives will be retransmitted to all nodes in the list.
+
+The server is smart enough to avoid sending messages in a cycle, and to avoid
+sending messages to itself. You can safely list every server you run in the
+configuration file, including the current server.
+
+You do not need to configure servers in an acyclic graph or only list //other//
+servers: just list everything on every server and Aphlict will figure things
+out from there.
+
+A simple example with two servers might look like this:
+
+```lang=json, name="aphlict.json (Cluster)"
+{
+  ...
+  "cluster": [
+    {
+      "host": "notify001.mycompany.com",
+      "port": 22281,
+      "protocol": "http"
+    },
+    {
+      "host": "notify002.mycompany.com",
+      "port": 22281,
+      "protocol": "http"
+    }
+  ]
+  ...
+}
+```
+
+
+Configuring Phabricator
+=======================
+
+To configure clustering on the client side, add every service you run to
+`notification.servers`. Generally, this will be twice as many entries as
+you run actual servers, since each server runs a `"client"` service and an
+`"admin"` service.
+
+A simple example with the two servers above (providing four total services)
+might look like this:
+
+```lang=json, name="notification.servers (Cluster)"
+[
+  {
+    "type": "client",
+    "host": "notify001.mycompany.com",
+    "port": 22280,
+    "protocol": "https"
+  },
+  {
+    "type": "client",
+    "host": "notify002.mycompany.com",
+    "port": 22280,
+    "protocol": "https"
+  },
+  {
+    "type": "admin",
+    "host": "notify001.mycompany.com",
+    "port": 22281,
+    "protocol": "http"
+  },
+  {
+    "type": "admin",
+    "host": "notify002.mycompany.com",
+    "port": 22281,
+    "protocol": "http"
+  }
+]
+```
+
+If you put all of the `"client"` servers behind a load balancer, you would
+just list the load balancer and let it handle pulling nodes in and out of
+service.
+
+```lang=json, name="notification.servers (Cluster + Load Balancer)"
+[
+  {
+    "type": "client",
+    "host": "notify-lb.mycompany.com",
+    "port": 22280,
+    "protocol": "https"
+  },
+  {
+    "type": "admin",
+    "host": "notify001.mycompany.com",
+    "port": 22281,
+    "protocol": "http"
+  },
+  {
+    "type": "admin",
+    "host": "notify001.mycompany.com",
+    "port": 22282,
+    "protocol": "http"
+  }
+]
+```
+
+Notification hosts do not need to run any additional services, although they
+are free to do so. The notification server generally consumes few resources
+and is resistant to most other loads on the machine, so it's reasonable to
+overlay these on top of other services wherever it is convenient.
+
+
+Next Steps
+==========
+
+Continue by:
+
+  - reviewing notification configuration with
+    @{article:Notifications User Guide: Setup and Configuration}; or
+  - returning to @{article:Clustering Introduction}.
diff --git a/src/docs/user/configuration/notifications.diviner b/src/docs/user/configuration/notifications.diviner
--- a/src/docs/user/configuration/notifications.diviner
+++ b/src/docs/user/configuration/notifications.diviner
@@ -77,6 +77,8 @@
 
   - `servers`: //Required list.// A list of servers to start.
   - `logs`: //Optional list.// A list of logs to write to.
+  - `cluster`: //Optional list.// A list of cluster peers. This is an advanced
+    feature.
   - `pidfile`: //Required string.// Path to a PID file.
 
 Each server in the `servers` list should be an object with these keys:
@@ -99,10 +101,20 @@
 
   - `path`: //Required string.// Path to the log file.
 
+Each peer in the `cluster` list should be an object with these keys:
+
+  - `host`: //Required string.// The peer host address.
+  - `port`: //Required int.// The peer port.
+  - `protocol`: //Required string.// The protocol to connect with, one of
+    `"http"` or `"https"`.
+
+Cluster configuration is an advanced topic and can be omitted for most
+installs. For more information on how to configure a cluster, see
+@{article:Clustering Introduction} and @{article:Cluster: Notifications}.
+
 The defaults are appropriate for simple cases, but you may need to adjust them
 if you are running a more complex configuration.
 
-
 Configuring Phabricator
 =======================
 
diff --git a/src/view/page/PhabricatorStandardPageView.php b/src/view/page/PhabricatorStandardPageView.php
--- a/src/view/page/PhabricatorStandardPageView.php
+++ b/src/view/page/PhabricatorStandardPageView.php
@@ -539,8 +539,9 @@
 
     if ($servers) {
       if ($user && $user->isLoggedIn()) {
-        // TODO: We could be smarter about selecting a server if there are
-        // multiple options available.
+        // TODO: We could tell the browser about all the servers and let it
+        // do random reconnects to improve reliability.
+        shuffle($servers);
         $server = head($servers);
 
         $client_uri = $server->getWebsocketURI();
diff --git a/support/aphlict/server/aphlict_server.js b/support/aphlict/server/aphlict_server.js
--- a/support/aphlict/server/aphlict_server.js
+++ b/support/aphlict/server/aphlict_server.js
@@ -81,7 +81,8 @@
 
 require('./lib/AphlictAdminServer');
 require('./lib/AphlictClientServer');
-
+require('./lib/AphlictPeerList');
+require('./lib/AphlictPeer');
 
 var ii;
 
@@ -173,7 +174,26 @@
   }
 }
 
+var peer_list = new JX.AphlictPeerList();
+
+debug.log(
+  'This server has fingerprint "%s".',
+  peer_list.getFingerprint());
+
+var cluster = config.cluster || [];
+for (ii = 0; ii < cluster.length; ii++) {
+  var peer = cluster[ii];
+
+  var peer_client = new JX.AphlictPeer()
+    .setHost(peer.host)
+    .setPort(peer.port)
+    .setProtocol(peer.protocol);
+
+  peer_list.addPeer(peer_client);
+}
+
 for (ii = 0; ii < aphlict_admins.length; ii++) {
   var admin_server = aphlict_admins[ii];
   admin_server.setClientServers(aphlict_clients);
+  admin_server.setPeerList(peer_list);
 }
diff --git a/support/aphlict/server/lib/AphlictAdminServer.js b/support/aphlict/server/lib/AphlictAdminServer.js
--- a/support/aphlict/server/lib/AphlictAdminServer.js
+++ b/support/aphlict/server/lib/AphlictAdminServer.js
@@ -22,6 +22,7 @@
   properties: {
     clientServers: null,
     logger: null,
+    peerList: null
   },
 
   members: {
@@ -79,8 +80,7 @@
               ++self._messagesIn;
 
               try {
-                self._transmit(instance, msg);
-                response.writeHead(200, {'Content-Type': 'text/plain'});
+                self._transmit(instance, msg, response);
               } catch (err) {
                 self.log(
                   '<%s> Internal Server Error! %s',
@@ -139,14 +139,32 @@
     /**
      * Transmits a message to all subscribed listeners.
      */
-    _transmit: function(instance, message) {
-      var lists = this.getListenerLists(instance);
+    _transmit: function(instance, message, response) {
+      var peer_list = this.getPeerList();
 
-      for (var ii = 0; ii < lists.length; ii++) {
-        var list = lists[ii];
-        var listeners = list.getListeners();
-        this._transmitToListeners(list, listeners, message);
+      message = peer_list.addFingerprint(message);
+      if (message) {
+        var lists = this.getListenerLists(instance);
+
+        for (var ii = 0; ii < lists.length; ii++) {
+          var list = lists[ii];
+          var listeners = list.getListeners();
+          this._transmitToListeners(list, listeners, message);
+        }
+
+        peer_list.broadcastMessage(instance, message);
       }
+
+      // Respond to the caller with our fingerprint so it can stop sending
+      // us traffic we don't need to know about if it's a peer. In particular,
+      // this stops us from broadcasting messages to ourselves if we appear
+      // in the cluster list.
+      var receipt = {
+        fingerprint: this.getPeerList().getFingerprint()
+      };
+
+      response.writeHead(200, {'Content-Type': 'application/json'});
+      response.write(JSON.stringify(receipt));
     },
 
     _transmitToListeners: function(list, listeners, message) {
diff --git a/support/aphlict/server/lib/AphlictPeer.js b/support/aphlict/server/lib/AphlictPeer.js
new file mode 100644
--- /dev/null
+++ b/support/aphlict/server/lib/AphlictPeer.js
@@ -0,0 +1,80 @@
+'use strict';
+
+var JX = require('./javelin').JX;
+
+var http = require('http');
+var https = require('https');
+
+JX.install('AphlictPeer', {
+
+  construct: function() {
+  },
+
+  properties: {
+    host: null,
+    port: null,
+    protocol: null,
+    fingerprint: null
+  },
+
+  members: {
+    broadcastMessage: function(instance, message) {
+      var data;
+      try {
+        data = JSON.stringify(message);
+      } catch (error) {
+        return;
+      }
+
+      // TODO: Maybe use "agent" stuff to pool connections?
+
+      var options = {
+        hostname: this.getHost(),
+        port: this.getPort(),
+        method: 'POST',
+        path: '/?instance=' + instance,
+        headers: {
+          'Content-Type': 'application/json',
+          'Content-Length': data.length
+        }
+      };
+
+      var onresponse = JX.bind(this, this._onresponse);
+
+      var request;
+      if (this.getProtocol() == 'https') {
+        request = https.request(options, onresponse);
+      } else {
+        request = http.request(options, onresponse);
+      }
+
+      request.write(data);
+      request.end();
+    },
+
+    _onresponse: function(response) {
+      var peer = this;
+      var data = '';
+
+      response.on('data', function(bytes) {
+        data += bytes;
+      });
+
+      response.on('end', function() {
+        var message;
+        try {
+          message = JSON.parse(data);
+        } catch (error) {
+          return;
+        }
+
+        // If we got a valid receipt, update the fingerprint for this server.
+        var fingerprint = message.fingerprint;
+        if (fingerprint) {
+          peer.setFingerprint(fingerprint);
+        }
+      });
+    }
+  }
+
+});
diff --git a/support/aphlict/server/lib/AphlictPeerList.js b/support/aphlict/server/lib/AphlictPeerList.js
new file mode 100644
--- /dev/null
+++ b/support/aphlict/server/lib/AphlictPeerList.js
@@ -0,0 +1,86 @@
+'use strict';
+
+var JX = require('./javelin').JX;
+
+JX.install('AphlictPeerList', {
+
+  construct: function() {
+    this._peers = [];
+
+    // Generate a new unique identify for this server. We just use this to
+    // identify messages we have already seen and figure out which peer is
+    // actually us, so we don't bounce messages around the cluster forever.
+    this._fingerprint = this._generateFingerprint();
+  },
+
+  properties: {
+  },
+
+  members: {
+    _peers: null,
+    _fingerprint: null,
+
+    addPeer: function(peer) {
+      this._peers.push(peer);
+      return this;
+    },
+
+    addFingerprint: function(message) {
+      var fingerprint = this.getFingerprint();
+
+      // Check if we've already touched this message. If we have, we do not
+      // broadcast it again. If we haven't, we add our fingerprint and then
+      // broadcast the modified version.
+      var touched = message.touched || [];
+      for (var ii = 0; ii < touched.length; ii++) {
+        if (touched[ii] == fingerprint) {
+          return null;
+        }
+      }
+      touched.push(fingerprint);
+
+      message.touched = touched;
+      return message;
+    },
+
+    broadcastMessage: function(instance, message) {
+      var ii;
+
+      var touches = {};
+      var touched = message.touched;
+      for (ii = 0; ii < touched.length; ii++) {
+        touches[touched[ii]] = true;
+      }
+
+      var peers = this._peers;
+      for (ii = 0; ii < peers.length; ii++) {
+        var peer = peers[ii];
+
+        // If we know the peer's fingerprint and it has already touched
+        // this message, don't broadcast it.
+        var fingerprint = peer.getFingerprint();
+        if (fingerprint && touches[fingerprint]) {
+          continue;
+        }
+
+        peer.broadcastMessage(instance, message);
+      }
+    },
+
+    getFingerprint: function() {
+      return this._fingerprint;
+    },
+
+    _generateFingerprint: function() {
+      var src = '23456789abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ';
+      var len = 16;
+      var out = [];
+      for (var ii = 0; ii < len; ii++) {
+        var idx = Math.floor(Math.random() * src.length);
+        out.push(src[idx]);
+      }
+      return out.join('');
+    }
+  }
+
+});