modules/core/dbutils/pom.xml | 2
modules/core/dbutils/src/main/scripts/dbsetup/measurement-schema.xml | 4
modules/core/dbutils/src/main/scripts/dbupgrade/db-upgrade.xml | 13
modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java | 32
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java | 79 +-
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java | 4
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerRemote.java | 2
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageClusterMonitor.java | 9
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java | 356 ++++++----
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java | 19
modules/plugins/rhq-storage/src/main/java/org/rhq/plugins/storage/StorageNodeComponent.java | 4
modules/plugins/rhq-storage/src/main/resources/META-INF/rhq-plugin.xml | 5
12 files changed, 369 insertions(+), 160 deletions(-)
New commits:
commit 6a41f788ada57056207fa5ee36259c9f1f2fb42a
Author: John Sanda <jsanda(a)redhat.com>
Date: Wed Aug 14 23:32:41 2013 -0400
bump up dbupgrade version
diff --git a/modules/core/dbutils/pom.xml b/modules/core/dbutils/pom.xml
index 2e8e4b8..1c66dd6 100644
--- a/modules/core/dbutils/pom.xml
+++ b/modules/core/dbutils/pom.xml
@@ -17,7 +17,7 @@
<description>Database schema setup, upgrade and other utilities</description>
<properties>
- <db.schema.version>2.136</db.schema.version>
+ <db.schema.version>2.137</db.schema.version>
<rhq.ds.type-mapping>${rhq.test.ds.type-mapping}</rhq.ds.type-mapping>
<rhq.ds.server-name>${rhq.test.ds.server-name}</rhq.ds.server-name>
<rhq.ds.db-name>${rhq.test.ds.db-name}</rhq.ds.db-name>
commit 9a9767577237ee0efc6cd902a3fb8861f280d193
Author: John Sanda <jsanda(a)redhat.com>
Date: Wed Aug 14 23:02:34 2013 -0400
adding more error handling for storage node deployments
In my previous commit I added code to persist resource operation failures that
occur during storage node deployment. This commit adds error handling for
unexpected server side errors. Errors are logged to the StorageNode entity in
a separate transaction to ensure that the error message gets persisted.
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java
deleted file mode 100644
index fca6e96..0000000
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package org.rhq.enterprise.server.storage;
-
-/**
- * @author John Sanda
- */
-public class StorageNodeDeploymentException extends RuntimeException {
-
- public StorageNodeDeploymentException() {
- }
-
- public StorageNodeDeploymentException(String message) {
- super(message);
- }
-
- public StorageNodeDeploymentException(String message, Throwable cause) {
- super(message, cause);
- }
-
- public StorageNodeDeploymentException(Throwable cause) {
- super(cause);
- }
-}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
index aaa54f5..72f5e9d 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
@@ -12,7 +12,6 @@ import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
-import javax.persistence.PersistenceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -23,13 +22,13 @@ import org.rhq.core.domain.common.JobTrigger;
import org.rhq.core.domain.configuration.Configuration;
import org.rhq.core.domain.configuration.PropertyList;
import org.rhq.core.domain.configuration.PropertySimple;
-import org.rhq.core.domain.operation.OperationDefinition;
import org.rhq.core.domain.operation.OperationHistory;
import org.rhq.core.domain.operation.ResourceOperationHistory;
import org.rhq.core.domain.operation.bean.ResourceOperationSchedule;
import org.rhq.core.domain.resource.Resource;
import org.rhq.core.domain.resource.ResourceType;
import org.rhq.core.util.StringUtil;
+import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.enterprise.server.RHQConstants;
import org.rhq.enterprise.server.auth.SessionManager;
import org.rhq.enterprise.server.auth.SubjectManagerLocal;
@@ -71,8 +70,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
@EJB
private StorageClientManagerBean storageClientManager;
+ @EJB
+ private StorageNodeOperationsHandlerLocal storageNodeOperationsHandler;
+
@Override
- @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void announceStorageNode(Subject subject, StorageNode storageNode) {
if (log.isInfoEnabled()) {
log.info("Announcing " + storageNode + " to storage node cluster.");
@@ -110,6 +111,18 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
@Override
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public void logError(StorageNode.OperationMode newStorageNodeOperationMode, String error, Exception e) {
+ try {
+ StorageNode newStorageNode = findNewStorgeNode(newStorageNodeOperationMode);
+ newStorageNode.setErrorMessage(error + " Check the server log for details. Root cause: " +
+ ThrowableUtil.getRootCause(e).getMessage());
+ } catch (Exception e1) {
+ log.error("Failed to log error against storage node", e);
+ }
+ }
+
+ @Override
public void performAddNodeMaintenanceIfNecessary(InetAddress storageNodeAddress) {
StorageNode storageNode = entityManager.createNamedQuery(StorageNode.QUERY_FIND_BY_ADDRESS,
StorageNode.class).setParameter("address", storageNodeAddress.getHostAddress()).getSingleResult();
@@ -167,24 +180,77 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
return;
}
- ResourceOperationHistory resourceOperationHistory = entityManager.find(ResourceOperationHistory.class,
- operationHistory.getId());
- if (resourceOperationHistory == null) {
+ ResourceOperationHistory resourceOperationHistory = (ResourceOperationHistory) operationHistory;
+ if (!isStorageNodeOperation(resourceOperationHistory)) {
return;
}
- if (isStorageNodeOperation(resourceOperationHistory.getOperationDefinition())) {
- if (resourceOperationHistory.getOperationDefinition().getName().equals("updateKnownNodes")) {
- handleUpdateKnownNodes(resourceOperationHistory);
- } else if (operationHistory.getOperationDefinition().getName().equals("prepareForBootstrap")) {
- handlePrepareForBootstrap(resourceOperationHistory);
- } else if (operationHistory.getOperationDefinition().getName().equals("addNodeMaintenance")) {
- handleAddNodeMaintenance(resourceOperationHistory);
+ if (resourceOperationHistory.getOperationDefinition().getName().equals("updateKnownNodes")) {
+ try {
+ storageNodeOperationsHandler.handleUpdateKnownNodes(resourceOperationHistory);
+ } catch (Exception e) {
+ String msg = "Aborting storage node deployment due to unexpected error while announcing cluster nodes.";
+ log.error(msg, e);
+ storageNodeOperationsHandler.logError(StorageNode.OperationMode.ANNOUNCE, msg, e);
+ }
+ } else if (operationHistory.getOperationDefinition().getName().equals("prepareForBootstrap")) {
+ try {
+ storageNodeOperationsHandler.handlePrepareForBootstrap(resourceOperationHistory);
+ } catch (Exception e) {
+ String msg = "Aborting storage node deployment due to unexpected error while bootstrapping new node.";
+ log.error(msg, e);
+ storageNodeOperationsHandler.logError(StorageNode.OperationMode.BOOTSTRAP, msg, e);
+ }
+ } else if (operationHistory.getOperationDefinition().getName().equals("addNodeMaintenance")) {
+ try {
+ storageNodeOperationsHandler.handleAddNodeMaintenance(resourceOperationHistory);
+ } catch (Exception e) {
+ String msg = "Aborting storage node deployment due to unexpected error while performing add node " +
+ "maintenance.";
+ log.error(msg, e);
+ storageNodeOperationsHandler.logError(StorageNode.OperationMode.ADD_NODE_MAINTENANCE, msg, e);
}
}
+
}
- private void handlePrepareForBootstrap(ResourceOperationHistory resourceOperationHistory) {
+ @Override
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public void handleUpdateKnownNodes(ResourceOperationHistory resourceOperationHistory) {
+ StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
+ StorageNode newStorageNode = null;
+ switch (resourceOperationHistory.getStatus()) {
+ case INPROGRESS:
+ // nothing to do here
+ return;
+ case CANCELED:
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ operationCanceled(storageNode, resourceOperationHistory, newStorageNode);
+ case FAILURE:
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ operationFailed(storageNode, resourceOperationHistory, newStorageNode);
+ return;
+ default: // SUCCESS
+ Configuration parameters = resourceOperationHistory.getParameters();
+ PropertyList addresses = parameters.getList("addresses");
+ List<String> remainingNodes = getRemainingNodes(resourceOperationHistory);
+
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ Subject subject = getSubject(resourceOperationHistory);
+
+ if (remainingNodes.isEmpty()) {
+ log.info("Successfully announced new storage node to storage cluster");
+ newStorageNode.setOperationMode(StorageNode.OperationMode.BOOTSTRAP);
+ prepareNodeForBootstrap(subject, newStorageNode, addresses.deepCopy(false));
+ } else {
+ announceStorageNode(subject, newStorageNode, addresses.deepCopy(false), remainingNodes);
+ }
+ }
+ }
+
+ @Override
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public void handlePrepareForBootstrap(ResourceOperationHistory resourceOperationHistory) {
StorageNode newStorageNode = findStorageNode(resourceOperationHistory.getResource());
switch (resourceOperationHistory.getStatus()) {
case INPROGRESS:
@@ -195,15 +261,9 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
// If the operation is canceled the plugin will get an InterruptedException.
// The actual bootstrapping may very well complete so we need to add in some
// checks to find out if the node is up and part of the cluster.
-
- log.error("The operation [prepareForBootstrap] was canceled for " + newStorageNode +
- ". Deployment of the new storage node cannot proceed.");
operationCanceled(newStorageNode, resourceOperationHistory);
return;
case FAILURE:
- log.error("The operation [preparedForBootstrap] failed for " + newStorageNode + ". The reported " +
- "failure is: " + resourceOperationHistory.getErrorMessage());
- log.error("Deployment of the new storage node cannot proceed.");
operationFailed(newStorageNode, resourceOperationHistory);
return;
default: // SUCCESS
@@ -212,47 +272,9 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
}
- private void handleUpdateKnownNodes(ResourceOperationHistory resourceOperationHistory) {
- StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
- StorageNode newStorageNode = null;
- switch (resourceOperationHistory.getStatus()) {
- case INPROGRESS:
- // nothing to do here
- return;
- case CANCELED:
- log.error("The operation [updateKnownNodes] was canceled for " + storageNode +
- ". Deployment of the new storage node cannot proceed.");
- newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
- operationCanceled(storageNode, resourceOperationHistory, newStorageNode);
- case FAILURE:
- log.error("The operation [updateKnownNodes] failed for " + storageNode + ". The reported " +
- "failure is: " + resourceOperationHistory.getErrorMessage());
- log.error("Deployment of the new storage node cannot proceed.");
- newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
- operationFailed(storageNode, resourceOperationHistory, newStorageNode);
- return;
- default: // SUCCESS
- if (log.isInfoEnabled()) {
- log.info("Finished announcing cluster nodes to " + storageNode);
- }
- Configuration parameters = resourceOperationHistory.getParameters();
- PropertyList addresses = parameters.getList("addresses");
- List<String> remainingNodes = getRemainingNodes(resourceOperationHistory);
-
- newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
- Subject subject = getSubject(resourceOperationHistory);
-
- if (remainingNodes.isEmpty()) {
- log.info("Successfully announced new storage node to cluster");
- newStorageNode.setOperationMode(StorageNode.OperationMode.BOOTSTRAP);
- prepareNodeForBootstrap(subject, newStorageNode, addresses.deepCopy(false));
- } else {
- announceStorageNode(subject, newStorageNode, addresses.deepCopy(false), remainingNodes);
- }
- }
- }
-
- private void handleAddNodeMaintenance(ResourceOperationHistory resourceOperationHistory) {
+ @Override
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public void handleAddNodeMaintenance(ResourceOperationHistory resourceOperationHistory) {
StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
StorageNode newStorageNode = null;
switch (resourceOperationHistory.getStatus()) {
@@ -260,21 +282,16 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
// nothing to do here
return;
case CANCELED:
- log.error("The operation [addNodeMaintenance] was canceled for " + storageNode + ". This operation " +
- "needs to be run on each storage node when a new node is added to the cluster.");
newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
operationCanceled(storageNode, resourceOperationHistory, newStorageNode);
return;
case FAILURE:
- log.error("The operation [addNodeMaintenance] failed for " + storageNode + ". This operation " +
- "needs to be run on each storage node when a new node is added to the cluster. The reported " +
- "failure is: " + resourceOperationHistory.getErrorMessage());
newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
operationFailed(storageNode, resourceOperationHistory, newStorageNode);
return;
default: // SUCCESS
if (log.isInfoEnabled()) {
- log.info("Finnished cluster maintenance for " + storageNode + " for addition of new node");
+ log.info("Finished cluster maintenance for " + storageNode + " for addition of new node");
}
storageNode.setOperationMode(StorageNode.OperationMode.NORMAL);
StorageNode nextNode = takeFromQueue(storageNode, StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
@@ -298,6 +315,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
private void operationCanceled(StorageNode storageNode, ResourceOperationHistory operationHistory,
StorageNode newStorageNode) {
+ log.error("Deployment has been aborted due to canceled operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + " on " + storageNode.getResource() +
+ ": " + operationHistory.getErrorMessage());
+
newStorageNode.setErrorMessage("Deployment has been aborted due to canceled resource operation on " +
storageNode.getAddress());
storageNode.setErrorMessage("Deployment of " + newStorageNode.getAddress() + " has been aborted due " +
@@ -307,6 +328,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
private void operationCanceled(StorageNode newStorageNode, ResourceOperationHistory operationHistory) {
+ log.error("Deployment has been aborted due to canceled operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + " on " + newStorageNode.getResource() +
+ ": " + operationHistory.getErrorMessage());
+
newStorageNode.setErrorMessage("Deployment has been aborted due to canceled resource operation [" +
operationHistory.getOperationDefinition().getDisplayName() + "].");
newStorageNode.setFailedOperation(operationHistory);
@@ -314,6 +339,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
private void operationFailed(StorageNode storageNode, ResourceOperationHistory operationHistory,
StorageNode newStorageNode) {
+ log.error("Deployment has been aborted due to failed operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + "] on " + storageNode.getResource() +
+ ": " + operationHistory.getErrorMessage());
+
newStorageNode.setErrorMessage("Deployment has been aborted due to failed resource operation on " +
storageNode.getAddress());
storageNode.setErrorMessage("Deployment of " + newStorageNode.getAddress() + " has been aborted due " +
@@ -322,6 +351,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
private void operationFailed(StorageNode newStorageNode, ResourceOperationHistory operationHistory) {
+ log.error("Deployment has been aborted due to failed operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + "] on " + newStorageNode.getResource() +
+ ": " + operationHistory.getErrorMessage());
+
newStorageNode.setErrorMessage("Deployment has been aborted due to failed resource operation [" +
operationHistory.getOperationDefinition().getDisplayName() + "].");
newStorageNode.setFailedOperation(operationHistory);
@@ -391,28 +424,21 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
private StorageNode findStorageNodeByAddress(String address) {
- try {
- return entityManager.createNamedQuery(StorageNode.QUERY_FIND_BY_ADDRESS, StorageNode.class)
+ return entityManager.createNamedQuery(StorageNode.QUERY_FIND_BY_ADDRESS, StorageNode.class)
.setParameter("address", address).getSingleResult();
-
- } catch (PersistenceException e) {
- throw new StorageNodeDeploymentException("Storage node deployment has failed! Failed to fetch the next " +
- "storage node at " + address + " to be updated.", e);
- }
}
private StorageNode findNewStorgeNode(StorageNode.OperationMode operationMode) {
- try {
- return entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE, StorageNode.class)
+ return entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE, StorageNode.class)
.setParameter("operationMode", operationMode).getSingleResult();
- } catch (PersistenceException e) {
- throw new StorageNodeDeploymentException("Storage node deployment has failed! Failed to fetch the " +
- "storage node to be deployed.", e);
- }
}
- private boolean isStorageNodeOperation(OperationDefinition operationDefinition) {
- ResourceType resourceType = operationDefinition.getResourceType();
+ private boolean isStorageNodeOperation(ResourceOperationHistory operationHistory) {
+ if (operationHistory == null) {
+ return false;
+ }
+
+ ResourceType resourceType = operationHistory.getOperationDefinition().getResourceType();
return resourceType.getName().equals(STORAGE_NODE_TYPE_NAME) &&
resourceType.getPlugin().equals(STORAGE_NODE_PLUGIN_NAME);
}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
index 44bb842..83b0ce5 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
@@ -7,6 +7,7 @@ import javax.ejb.Asynchronous;
import org.rhq.core.domain.auth.Subject;
import org.rhq.core.domain.cloud.StorageNode;
import org.rhq.core.domain.operation.OperationHistory;
+import org.rhq.core.domain.operation.ResourceOperationHistory;
/**
* @author John Sanda
@@ -16,6 +17,12 @@ public interface StorageNodeOperationsHandlerLocal {
@Asynchronous
void handleOperationUpdateIfNecessary(OperationHistory operationHistory);
+ void handleUpdateKnownNodes(ResourceOperationHistory operationHistory);
+
+ void handlePrepareForBootstrap(ResourceOperationHistory operationHistory);
+
+ void handleAddNodeMaintenance(ResourceOperationHistory operationHistory);
+
void announceStorageNode(Subject subject, StorageNode storageNode);
void bootstrapStorageNode(Subject subject, StorageNode storageNode);
@@ -23,4 +30,6 @@ public interface StorageNodeOperationsHandlerLocal {
void performAddNodeMaintenanceIfNecessary(InetAddress storageNodeAddress);
void performAddNodeMaintenance(Subject subject, StorageNode storageNode);
+
+ void logError(StorageNode.OperationMode newStorageNodeOperationMode, String error, Exception e);
}
commit 2089a3788c5bc35de0b81a35c99398c43489ef59
Author: John Sanda <jsanda(a)redhat.com>
Date: Wed Aug 14 17:36:23 2013 -0400
capture and log deployment failures that result from failed resource operations
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
index 34f6381..861e3fa 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
@@ -182,7 +182,7 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
if (log.isInfoEnabled()) {
log.info("Scheduling cluster maintenance to deploy " + storageNode + " into the storage cluster...");
}
- deployStorageNode(subjectManager.getOverlord(), storageNode.getId());
+ deployStorageNode(subjectManager.getOverlord(), storageNode);
}
} catch (UnknownHostException e) {
throw new RuntimeException("Could not resolve address [" + address + "]. The resource " + resource +
@@ -233,18 +233,21 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
}
@Override
- public void deployStorageNode(Subject subject, int storageNodeId) {
- StorageNode storageNode = entityManager.find(StorageNode.class, storageNodeId);
+ public void deployStorageNode(Subject subject, StorageNode storageNode) {
+ storageNode = entityManager.find(StorageNode.class, storageNode.getId());
switch (storageNode.getOperationMode()) {
case INSTALLED:
case ANNOUNCE:
+ reset();
storageNodeOperationsHandler.announceStorageNode(subject, storageNode);
break;
case BOOTSTRAP:
+ reset();
storageNodeOperationsHandler.bootstrapStorageNode(subject, storageNode);
break;
case ADD_NODE_MAINTENANCE:
+ reset();
storageNodeOperationsHandler.performAddNodeMaintenance(subject, storageNode);
default:
// For any other operation mode, the storage node should already be part of
@@ -253,6 +256,13 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
}
}
+ private void reset() {
+ for (StorageNode storageNode : getStorageNodes()) {
+ storageNode.setErrorMessage(null);
+ storageNode.setFailedOperation(null);
+ }
+ }
+
private List<StorageNode> combine(List<StorageNode> storageNodes, StorageNode storageNode) {
List<StorageNode> newList = new ArrayList<StorageNode>(storageNodes.size() + 1);
newList.addAll(storageNodes);
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
index e5f4f22..75a795c 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
@@ -169,5 +169,5 @@ public interface StorageNodeManagerLocal {
StorageNode createStorageNode(Resource resource);
- void deployStorageNode(Subject subject, int storageNodeId);
+ void deployStorageNode(Subject subject, StorageNode storageNode);
}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerRemote.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerRemote.java
index 75ac02b..2255299 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerRemote.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerRemote.java
@@ -96,4 +96,6 @@ public interface StorageNodeManagerRemote {
* @return all storage nodes alerts
*/
PageList<Alert> findAllStorageNodeAlerts(Subject subject, StorageNode storageNode);
+
+ void deployStorageNode(Subject sbubject, StorageNode storageNode);
}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
index f996bf2..aaa54f5 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
@@ -2,10 +2,8 @@ package org.rhq.enterprise.server.storage;
import java.net.InetAddress;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
-import java.util.Set;
import javax.ejb.Asynchronous;
import javax.ejb.EJB;
@@ -23,7 +21,6 @@ import org.rhq.core.domain.auth.Subject;
import org.rhq.core.domain.cloud.StorageNode;
import org.rhq.core.domain.common.JobTrigger;
import org.rhq.core.domain.configuration.Configuration;
-import org.rhq.core.domain.configuration.Property;
import org.rhq.core.domain.configuration.PropertyList;
import org.rhq.core.domain.configuration.PropertySimple;
import org.rhq.core.domain.operation.OperationDefinition;
@@ -194,15 +191,20 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
// nothing to do here
return;
case CANCELED:
+ // TODO Verify whether or not the node has been bootstrapped
+ // If the operation is canceled the plugin will get an InterruptedException.
+ // The actual bootstrapping may very well complete so we need to add in some
+ // checks to find out if the node is up and part of the cluster.
+
log.error("The operation [prepareForBootstrap] was canceled for " + newStorageNode +
". Deployment of the new storage node cannot proceed.");
- // TODO update workflow status (the status needs to be accessible in the UI)
+ operationCanceled(newStorageNode, resourceOperationHistory);
return;
case FAILURE:
log.error("The operation [preparedForBootstrap] failed for " + newStorageNode + ". The reported " +
"failure is: " + resourceOperationHistory.getErrorMessage());
log.error("Deployment of the new storage node cannot proceed.");
- // TODO update workflow status (the status needs to be accessible in the UI)
+ operationFailed(newStorageNode, resourceOperationHistory);
return;
default: // SUCCESS
// Nothing to do because we wait for the C* driver to notify us that the
@@ -212,6 +214,7 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
private void handleUpdateKnownNodes(ResourceOperationHistory resourceOperationHistory) {
StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
+ StorageNode newStorageNode = null;
switch (resourceOperationHistory.getStatus()) {
case INPROGRESS:
// nothing to do here
@@ -219,13 +222,14 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
case CANCELED:
log.error("The operation [updateKnownNodes] was canceled for " + storageNode +
". Deployment of the new storage node cannot proceed.");
- // TODO update workflow status (the status needs to be accessible in the UI)
- return;
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ operationCanceled(storageNode, resourceOperationHistory, newStorageNode);
case FAILURE:
log.error("The operation [updateKnownNodes] failed for " + storageNode + ". The reported " +
"failure is: " + resourceOperationHistory.getErrorMessage());
log.error("Deployment of the new storage node cannot proceed.");
- // TODO update workflow status (the status needs to be accessible in the UI)
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ operationFailed(storageNode, resourceOperationHistory, newStorageNode);
return;
default: // SUCCESS
if (log.isInfoEnabled()) {
@@ -235,7 +239,7 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
PropertyList addresses = parameters.getList("addresses");
List<String> remainingNodes = getRemainingNodes(resourceOperationHistory);
- StorageNode newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
Subject subject = getSubject(resourceOperationHistory);
if (remainingNodes.isEmpty()) {
@@ -248,13 +252,9 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
}
- private Subject getSubject(ResourceOperationHistory resourceOperationHistory) {
- Subject subject = subjectManager.getSubjectByName(resourceOperationHistory.getSubjectName());
- return SessionManager.getInstance().put(subject);
- }
-
private void handleAddNodeMaintenance(ResourceOperationHistory resourceOperationHistory) {
StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
+ StorageNode newStorageNode = null;
switch (resourceOperationHistory.getStatus()) {
case INPROGRESS:
// nothing to do here
@@ -262,13 +262,15 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
case CANCELED:
log.error("The operation [addNodeMaintenance] was canceled for " + storageNode + ". This operation " +
"needs to be run on each storage node when a new node is added to the cluster.");
- // TODO update workflow status (the status needs to be accessible in the UI)
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
+ operationCanceled(storageNode, resourceOperationHistory, newStorageNode);
return;
case FAILURE:
log.error("The operation [addNodeMaintenance] failed for " + storageNode + ". This operation " +
"needs to be run on each storage node when a new node is added to the cluster. The reported " +
"failure is: " + resourceOperationHistory.getErrorMessage());
- // TODO update workflow status (the status needs to be accessible in the UI)
+ newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
+ operationFailed(storageNode, resourceOperationHistory, newStorageNode);
return;
default: // SUCCESS
if (log.isInfoEnabled()) {
@@ -289,6 +291,42 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
}
}
+ private Subject getSubject(ResourceOperationHistory resourceOperationHistory) {
+ Subject subject = subjectManager.getSubjectByName(resourceOperationHistory.getSubjectName());
+ return SessionManager.getInstance().put(subject);
+ }
+
+ private void operationCanceled(StorageNode storageNode, ResourceOperationHistory operationHistory,
+ StorageNode newStorageNode) {
+ newStorageNode.setErrorMessage("Deployment has been aborted due to canceled resource operation on " +
+ storageNode.getAddress());
+ storageNode.setErrorMessage("Deployment of " + newStorageNode.getAddress() + " has been aborted due " +
+ "to cancellation of resource operation [" + operationHistory.getOperationDefinition().getDisplayName() +
+ "].");
+ storageNode.setFailedOperation(operationHistory);
+ }
+
+ private void operationCanceled(StorageNode newStorageNode, ResourceOperationHistory operationHistory) {
+ newStorageNode.setErrorMessage("Deployment has been aborted due to canceled resource operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + "].");
+ newStorageNode.setFailedOperation(operationHistory);
+ }
+
+ private void operationFailed(StorageNode storageNode, ResourceOperationHistory operationHistory,
+ StorageNode newStorageNode) {
+ newStorageNode.setErrorMessage("Deployment has been aborted due to failed resource operation on " +
+ storageNode.getAddress());
+ storageNode.setErrorMessage("Deployment of " + newStorageNode.getAddress() + " has been aborted due " +
+ "to failed resource operation [" + operationHistory.getOperationDefinition().getDisplayName() + "].");
+ storageNode.setFailedOperation(operationHistory);
+ }
+
+ private void operationFailed(StorageNode newStorageNode, ResourceOperationHistory operationHistory) {
+ newStorageNode.setErrorMessage("Deployment has been aborted due to failed resource operation [" +
+ operationHistory.getOperationDefinition().getDisplayName() + "].");
+ newStorageNode.setFailedOperation(operationHistory);
+ }
+
private StorageNode findStorageNode(Resource resource) {
for (StorageNode storageNode : storageNodeManager.getStorageNodes()) {
if (storageNode.getResource().getId() == resource.getId()) {
@@ -298,31 +336,6 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
return null;
}
- private StorageNode findStorageNodeToPrepareForBootstrap(PropertyList addressList) {
- // It is possible that we could have more that one INSTALLED node. We want to make
- // sure we grab the one that was just announced to the cluster.
- Set<String> addresses = toSet(addressList);
- List<StorageNode> installedNodes = entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE,
- StorageNode.class).setParameter("operationMode", StorageNode.OperationMode.INSTALLED).getResultList();
-
- for (StorageNode installedNode : installedNodes) {
- if (addresses.contains(installedNode.getAddress())) {
- return installedNode;
- }
- }
- // TODO What should we do in the very unlikely event that we do not find the IP address?
- throw new IllegalStateException("Failed to find storage node to be bootstrapped.");
- }
-
- private Set<String> toSet(PropertyList propertyList) {
- Set<String> set = new HashSet<String>();
- for (Property property : propertyList.getList()) {
- PropertySimple simple = (PropertySimple) property;
- set.add(simple.getStringValue());
- }
- return set;
- }
-
@Override
public void bootstrapStorageNode(Subject subject, StorageNode storageNode) {
List<StorageNode> clusterNodes = entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE,
commit 374e65d9c0712316f5650ffa7d5a069933c6b8b3
Author: John Sanda <jsanda(a)redhat.com>
Date: Wed Aug 14 16:13:18 2013 -0400
add fields in StorageNode for error reporting during maintenance
Storage node deployment and undeployment consists of a series of different
resource operations. The (un)deployment work flow could fail due to one of
those resource operations. When that occurs we can provide a direct link in the
StorageNode.failedOperation field to the operation history of the failed
operation. This direct link will help with providing quick insight into the
cause of the failure.
There is also a new errorMessage field in StorageNode. This field will provide
summary info about the failure. If the failure is in server side processing
and not in a resource operation, then the errorMessage field should be set but
not the failedOperation field.
diff --git a/modules/core/dbutils/src/main/scripts/dbsetup/measurement-schema.xml b/modules/core/dbutils/src/main/scripts/dbsetup/measurement-schema.xml
index c15aa1f..f307ff8 100644
--- a/modules/core/dbutils/src/main/scripts/dbsetup/measurement-schema.xml
+++ b/modules/core/dbutils/src/main/scripts/dbsetup/measurement-schema.xml
@@ -454,7 +454,9 @@
<column name="OPERATION_MODE" required="true" size="32" type="VARCHAR2" />
<column name="CTIME" required="true" type="LONG" />
<column name="MTIME" required="true" type="LONG" />
- <column name="RESOURCE_ID" required="false" type="INTEGER" references="RHQ_RESOURCE(ID)" />
+ <column name="ERROR_MSG" required="false" type="LONGVARCHAR"/>
+ <column name="RESOURCE_ID" required="false" type="INTEGER" references="RHQ_RESOURCE(ID)" />
+ <column name="RESOURCE_OP_HIST_ID" required="false" type="INTEGER" references="RHQ_OPERATION_HISTORY(ID)"/>
<!-- This index is for constraint, not performance -->
<index name="RHQ_STORAGE_NODE_UNIQUE" unique="true">
diff --git a/modules/core/dbutils/src/main/scripts/dbupgrade/db-upgrade.xml b/modules/core/dbutils/src/main/scripts/dbupgrade/db-upgrade.xml
index 6f42345..edf0147 100644
--- a/modules/core/dbutils/src/main/scripts/dbupgrade/db-upgrade.xml
+++ b/modules/core/dbutils/src/main/scripts/dbupgrade/db-upgrade.xml
@@ -2184,6 +2184,19 @@
<schema-alterColumn table="RHQ_CONFIG_PD_OSRC" column="EXPRESSION_SCOPE" nullable="false" default="unlimited"/>
</schemaSpec>
+ <schemaSpec version="2.137">
+ <schema-addColumn table="RHQ_STORAGE_NODE" column="ERROR_MSG" columnType="LONGVARCHAR"/>
+ <schema-addColumn table="RHQ_STORAGE_NODE" column="RESOURCE_OP_HIST_ID" columnType="INTEGER"/>
+ <schema-directSQL>
+ <statement desc="Creating RHQ_STORAGE_NODE foreign key to RHQ_OPERATION_HISTORY">
+ ALTER TABLE RHQ_STORAGE_NODE
+ ADD CONSTRAINT RHQ_SN_OP_HIST_ID_FK
+ FOREIGN KEY (RESOURCE_OP_HIST_ID)
+ REFERENCES RHQ_OPERATION_HISTORY (ID)
+ </statement>
+ </schema-directSQL>
+ </schemaSpec>
+
</dbupgrade>
</target>
</project>
diff --git a/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java b/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
index 3e94526..3f2a89c 100644
--- a/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
+++ b/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
@@ -40,6 +40,7 @@ import javax.persistence.PrePersist;
import javax.persistence.SequenceGenerator;
import javax.persistence.Table;
+import org.rhq.core.domain.operation.ResourceOperationHistory;
import org.rhq.core.domain.resource.Resource;
/**
@@ -134,10 +135,17 @@ public class StorageNode implements Serializable {
@Column(name = "MTIME", nullable = false)
private long mtime;
+ @Column(name = "ERROR_MSG", nullable = true)
+ private String errorMessage;
+
@JoinColumn(name = "RESOURCE_ID", referencedColumnName = "ID", nullable = true)
@OneToOne(fetch = FetchType.EAGER, optional = true)
private Resource resource;
+ @JoinColumn(name = "RESOURCE_OP_HIST_ID", referencedColumnName = "ID", nullable = true)
+ @OneToOne(optional = true)
+ private ResourceOperationHistory failedOperation;
+
// required for JPA
public StorageNode() {
}
@@ -198,6 +206,22 @@ public class StorageNode implements Serializable {
this.resource = resource;
}
+ public String getErrorMessage() {
+ return errorMessage;
+ }
+
+ public void setErrorMessage(String errorMessage) {
+ this.errorMessage = errorMessage;
+ }
+
+ public ResourceOperationHistory getFailedOperation() {
+ return failedOperation;
+ }
+
+ public void setFailedOperation(ResourceOperationHistory failedOperation) {
+ this.failedOperation = failedOperation;
+ }
+
public OperationMode getOperationMode() {
return operationMode;
}
commit 2b73f4d7f00252b9f4ea71fce47f54458cf683b2
Author: John Sanda <jsanda(a)redhat.com>
Date: Wed Aug 14 12:08:13 2013 -0400
refactoring state transitions and adding method for deployment
When a storage node is committed into inventory its operation mode is set to
INSTALLED, unless the storage node entity exists in which case the mode is set
to NORMAL. After creating the storage node entity, deployment is started. The
operation mode changs to ANNOUNCE. The address of the new node is announced to
existing cluster nodes. After announcing completes, the operation mode changes
to BOOTSTRAP, and the prepareForBootstrap operation is run on the new node.
When the new node is reported up as part of the cluster, the operation mode of
all cluster modes is set to ADD_NODE_MAINTENANCE. The addNodeMaintenance
operation is then run on each storage node. When that operation completes, the
node's operation mode is set back to NORMAL.
The StorageNodeManagerBean.deployStorageNode method looks at the operation mode
of the node to determine at what step in the process to start the deployment.
The deployStorageNode method is the only method that the UI or remote API will
need to invoke to start or resume a deployment.
diff --git a/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java b/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
index 6a5cf6a..3e94526 100644
--- a/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
+++ b/modules/core/domain/src/main/java/org/rhq/core/domain/cloud/StorageNode.java
@@ -209,11 +209,13 @@ public class StorageNode implements Serializable {
public enum OperationMode {
DOWN("This storage node is down"), //
- INSTALLED("This storage node is newly installed but not yet operationial"), //
+ INSTALLED("This storage node is newly installed but not yet operational"), //
MAINTENANCE("This storage node is in maintenance mode"), //
NORMAL("This storage node is running normally"),
- ANNOUNCE("The storage node is running normally and is being updated to have newly deployed storage nodes " +
- "announced to it so that those new nodes can join the cluster."),
+ ANNOUNCE("The storage node is installed but not yet part of the cluster. It is being announced so that it " +
+ "can join the cluster."),
+ BOOTSTRAP("The storage is installed but not yet part of the cluster. It is getting bootstrapped into the " +
+ "cluster"),
ADD_NODE_MAINTENANCE("The storage node is running and is preparing to undergo routine maintenance that is " +
"necessary when a new node joins the cluster.");
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
index 11d81b4..34f6381 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerBean.java
@@ -177,21 +177,12 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
storageNode.setOperationMode(OperationMode.NORMAL);
initClusterSettingsIfNecessary(pluginConfig);
} else {
- storageNode = new StorageNode();
- storageNode.setAddress(address);
- storageNode.setCqlPort(Integer.parseInt(pluginConfig.getSimpleValue(RHQ_STORAGE_CQL_PORT_PROPERTY)));
- storageNode.setJmxPort(Integer.parseInt(pluginConfig.getSimpleValue(RHQ_STORAGE_JMX_PORT_PROPERTY)));
- storageNode.setResource(resource);
- storageNode.setOperationMode(OperationMode.INSTALLED);
-
- entityManager.persist(storageNode);
+ storageNode = createStorageNode(resource);
if (log.isInfoEnabled()) {
- log.info(storageNode + " is a new storage node and not part of the storage node cluster.");
- log.info("Scheduling maintenance operations to bring " + storageNode + " into the cluster...");
+ log.info("Scheduling cluster maintenance to deploy " + storageNode + " into the storage cluster...");
}
-
- announceNewNode(storageNode);
+ deployStorageNode(subjectManager.getOverlord(), storageNode.getId());
}
} catch (UnknownHostException e) {
throw new RuntimeException("Could not resolve address [" + address + "]. The resource " + resource +
@@ -224,17 +215,42 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
storageClusterSettingsManager.setClusterSettings(subjectManager.getOverlord(), clusterSettings);
}
- private void announceNewNode(StorageNode newStorageNode) {
- if (log.isInfoEnabled()) {
- log.info("Announcing " + newStorageNode + " to storage node cluster.");
- }
+ @Override
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public StorageNode createStorageNode(Resource resource) {
+ Configuration pluginConfig = resource.getPluginConfiguration();
+
+ StorageNode storageNode = new StorageNode();
+ storageNode.setAddress(pluginConfig.getSimpleValue(RHQ_STORAGE_ADDRESS_PROPERTY));
+ storageNode.setCqlPort(Integer.parseInt(pluginConfig.getSimpleValue(RHQ_STORAGE_CQL_PORT_PROPERTY)));
+ storageNode.setJmxPort(Integer.parseInt(pluginConfig.getSimpleValue(RHQ_STORAGE_JMX_PORT_PROPERTY)));
+ storageNode.setResource(resource);
+ storageNode.setOperationMode(OperationMode.INSTALLED);
- List<StorageNode> clusteredNodes = getClusteredStorageNodes();
- for (StorageNode node : clusteredNodes) {
- node.setOperationMode(OperationMode.ANNOUNCE);
+ entityManager.persist(storageNode);
+
+ return storageNode;
+ }
+
+ @Override
+ public void deployStorageNode(Subject subject, int storageNodeId) {
+ StorageNode storageNode = entityManager.find(StorageNode.class, storageNodeId);
+
+ switch (storageNode.getOperationMode()) {
+ case INSTALLED:
+ case ANNOUNCE:
+ storageNodeOperationsHandler.announceStorageNode(subject, storageNode);
+ break;
+ case BOOTSTRAP:
+ storageNodeOperationsHandler.bootstrapStorageNode(subject, storageNode);
+ break;
+ case ADD_NODE_MAINTENANCE:
+ storageNodeOperationsHandler.performAddNodeMaintenance(subject, storageNode);
+ default:
+ // For any other operation mode, the storage node should already be part of
+ // the cluster.
+ // TODO Make sure that the storage node is in fact part of the cluster
}
- PropertyList addresses = createPropertyListOfAddresses("addresses", combine(clusteredNodes, newStorageNode));
- storageNodeOperationsHandler.announceNewStorageNode(newStorageNode, clusteredNodes.get(0), addresses);
}
private List<StorageNode> combine(List<StorageNode> storageNodes, StorageNode storageNode) {
@@ -254,12 +270,6 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
}
@Override
- public boolean isAddNodeMaintenanceInProgress() {
- return !entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE)
- .setParameter("operationMode", OperationMode.ADD_NODE_MAINTENANCE).getResultList().isEmpty();
- }
-
- @Override
@RequiredPermission(Permission.MANAGE_SETTINGS)
public StorageNodeLoadComposite getLoad(Subject subject, StorageNode node, long beginTime, long endTime) {
int resourceId = getResourceIdFromStorageNode(node);
@@ -418,11 +428,6 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
return result;
}
- private List<StorageNode> getClusteredStorageNodes() {
- return entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE, StorageNode.class)
- .setParameter("operationMode", OperationMode.NORMAL).getResultList();
- }
-
@Override
@RequiredPermission(Permission.MANAGE_SETTINGS)
public PageList<StorageNode> findStorageNodesByCriteria(Subject subject, StorageNodeCriteria criteria) {
@@ -796,4 +801,4 @@ public class StorageNodeManagerBean implements StorageNodeManagerLocal, StorageN
return successResultFound;
}
-}
\ No newline at end of file
+}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
index b5ee7f0..e5f4f22 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/cloud/StorageNodeManagerLocal.java
@@ -167,5 +167,7 @@ public interface StorageNodeManagerLocal {
Map<String, List<MeasurementDataNumericHighLowComposite>> findStorageNodeLoadDataForLast(Subject subject, StorageNode node, long beginTime, long endTime, int numPoints);
- boolean isAddNodeMaintenanceInProgress();
+ StorageNode createStorageNode(Resource resource);
+
+ void deployStorageNode(Subject subject, int storageNodeId);
}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageClusterMonitor.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageClusterMonitor.java
index 734da35..7db95fb 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageClusterMonitor.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageClusterMonitor.java
@@ -7,7 +7,6 @@ import com.datastax.driver.core.exceptions.NoHostAvailableException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.rhq.enterprise.server.cloud.StorageNodeManagerLocal;
import org.rhq.enterprise.server.util.LookupUtil;
import org.rhq.server.metrics.StorageStateListener;
@@ -30,12 +29,8 @@ public class StorageClusterMonitor implements StorageStateListener {
isClusterAvailable = true;
- StorageNodeManagerLocal storageNodeManager = LookupUtil.getStorageNodeManager();
- if (storageNodeManager.isAddNodeMaintenanceInProgress()) {
- log.info("Scheduling cluster maintenance...");
- StorageNodeOperationsHandlerLocal storageOperationsHandler = LookupUtil.getStorageNodeOperationsHandler();
- storageOperationsHandler.performAddNodeMaintenance(address);
- }
+ StorageNodeOperationsHandlerLocal storageOperationsHandler = LookupUtil.getStorageNodeOperationsHandler();
+ storageOperationsHandler.performAddNodeMaintenanceIfNecessary(address);
}
@Override
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java
new file mode 100644
index 0000000..fca6e96
--- /dev/null
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeDeploymentException.java
@@ -0,0 +1,22 @@
+package org.rhq.enterprise.server.storage;
+
+/**
+ * @author John Sanda
+ */
+public class StorageNodeDeploymentException extends RuntimeException {
+
+ public StorageNodeDeploymentException() {
+ }
+
+ public StorageNodeDeploymentException(String message) {
+ super(message);
+ }
+
+ public StorageNodeDeploymentException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public StorageNodeDeploymentException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
index 1bf3cec..f996bf2 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerBean.java
@@ -1,15 +1,20 @@
package org.rhq.enterprise.server.storage;
import java.net.InetAddress;
+import java.util.ArrayList;
import java.util.HashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import javax.ejb.Asynchronous;
import javax.ejb.EJB;
import javax.ejb.Stateless;
+import javax.ejb.TransactionAttribute;
+import javax.ejb.TransactionAttributeType;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
+import javax.persistence.PersistenceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -29,6 +34,7 @@ import org.rhq.core.domain.resource.Resource;
import org.rhq.core.domain.resource.ResourceType;
import org.rhq.core.util.StringUtil;
import org.rhq.enterprise.server.RHQConstants;
+import org.rhq.enterprise.server.auth.SessionManager;
import org.rhq.enterprise.server.auth.SubjectManagerLocal;
import org.rhq.enterprise.server.cloud.StorageNodeManagerLocal;
import org.rhq.enterprise.server.operation.OperationManagerLocal;
@@ -69,39 +75,72 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
private StorageClientManagerBean storageClientManager;
@Override
- public void announceNewStorageNode(StorageNode newStorageNode, StorageNode clusterNode, PropertyList addresses) {
+ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+ public void announceStorageNode(Subject subject, StorageNode storageNode) {
if (log.isInfoEnabled()) {
- log.info("Announcing new storage node " + newStorageNode + " to cluster node " + clusterNode);
+ log.info("Announcing " + storageNode + " to storage node cluster.");
+ }
+ storageNode.setOperationMode(StorageNode.OperationMode.ANNOUNCE);
+ List<StorageNode> clusterNodes = entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE,
+ StorageNode.class).setParameter("operationMode", StorageNode.OperationMode.NORMAL).getResultList();
+ List<StorageNode> allNodes = new ArrayList<StorageNode>(clusterNodes);
+ allNodes.add(storageNode);
+
+ announceStorageNode(subject, storageNode, createPropertyListOfAddresses("addresses", allNodes),
+ getAddresses(clusterNodes));
+
+ }
+
+ private void announceStorageNode(Subject subject, StorageNode storageNode, PropertyList addresses,
+ List<String> remainingNodes) {
+ String address = remainingNodes.remove(0);
+ StorageNode clusterNode = findStorageNodeByAddress(address);
+
+ if (log.isInfoEnabled()) {
+ log.info("Announcing " + storageNode + " to cluster node " + clusterNode);
}
- Subject overlord = subjectManager.getOverlord();
ResourceOperationSchedule schedule = new ResourceOperationSchedule();
schedule.setResource(clusterNode.getResource());
schedule.setJobTrigger(JobTrigger.createNowTrigger());
- schedule.setSubject(overlord);
+ schedule.setSubject(subject);
schedule.setOperationName("updateKnownNodes");
Configuration parameters = new Configuration();
parameters.put(addresses);
+ parameters.put(new PropertySimple("remainingNodes", StringUtil.listToString(remainingNodes)));
schedule.setParameters(parameters);
- operationManager.scheduleResourceOperation(overlord, schedule);
+ operationManager.scheduleResourceOperation(subject, schedule);
}
@Override
- public void performAddNodeMaintenance(InetAddress storageNodeAddress) {
+ public void performAddNodeMaintenanceIfNecessary(InetAddress storageNodeAddress) {
StorageNode storageNode = entityManager.createNamedQuery(StorageNode.QUERY_FIND_BY_ADDRESS,
StorageNode.class).setParameter("address", storageNodeAddress.getHostAddress()).getSingleResult();
- storageNode.setOperationMode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
+ if (storageNode.getOperationMode() == StorageNode.OperationMode.BOOTSTRAP) {
+ performAddNodeMaintenance(subjectManager.getOverlord(), storageNode);
+ } else {
+ log.info(storageNode + " has already been bootstrapped. Skipping add node maintenance.");
+ }
+ }
+
+ @Override
+ public void performAddNodeMaintenance(Subject subject, StorageNode storageNode) {
+ storageNode.setOperationMode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
List<StorageNode> clusterNodes = entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE,
- StorageNode.class).setParameter("operationMode", StorageNode.OperationMode.ADD_NODE_MAINTENANCE)
+ StorageNode.class).setParameter("operationMode", StorageNode.OperationMode.NORMAL)
.getResultList();
-
+ for (StorageNode node : clusterNodes) {
+ node.setOperationMode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
+ }
+ clusterNodes.add(storageNode);
boolean runRepair = updateSchemaIfNecessary(clusterNodes);
-
- performAddNodeMaintenance(storageNode, runRepair, createPropertyListOfAddresses(SEEDS_LIST, clusterNodes));
+ performAddNodeMaintenance(subject, storageNode, runRepair, createPropertyListOfAddresses(SEEDS_LIST,
+ clusterNodes));
}
- private void performAddNodeMaintenance(StorageNode storageNode, boolean runRepair, PropertyList seedsList) {
+ private void performAddNodeMaintenance(Subject subject, StorageNode storageNode, boolean runRepair,
+ PropertyList seedsList) {
if (log.isInfoEnabled()) {
log.info("Running addNodeMaintenance for storage node " + storageNode);
}
@@ -111,7 +150,7 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
ResourceOperationSchedule schedule = new ResourceOperationSchedule();
schedule.setResource(storageNode.getResource());
schedule.setJobTrigger(JobTrigger.createNowTrigger());
- schedule.setSubject(overlord);
+ schedule.setSubject(subject);
schedule.setOperationName("addNodeMaintenance");
Configuration config = new Configuration();
@@ -192,22 +231,28 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
if (log.isInfoEnabled()) {
log.info("Finished announcing cluster nodes to " + storageNode);
}
- storageNode.setOperationMode(StorageNode.OperationMode.ADD_NODE_MAINTENANCE);
Configuration parameters = resourceOperationHistory.getParameters();
PropertyList addresses = parameters.getList("addresses");
- StorageNode nextNode = takeFromQueue(storageNode, StorageNode.OperationMode.ANNOUNCE);
+ List<String> remainingNodes = getRemainingNodes(resourceOperationHistory);
- if (nextNode == null) {
+ StorageNode newStorageNode = findNewStorgeNode(StorageNode.OperationMode.ANNOUNCE);
+ Subject subject = getSubject(resourceOperationHistory);
+
+ if (remainingNodes.isEmpty()) {
log.info("Successfully announced new storage node to cluster");
- StorageNode installedNode = findStorageNodeToPrepareForBootstrap(addresses);
- // Pass a copy of addresses to avoid a TransientObjectException
- prepareNodeForBootstrap(installedNode, addresses.deepCopy(false));
+ newStorageNode.setOperationMode(StorageNode.OperationMode.BOOTSTRAP);
+ prepareNodeForBootstrap(subject, newStorageNode, addresses.deepCopy(false));
} else {
- announceNewStorageNode(storageNode, nextNode, addresses.deepCopy(false));
+ announceStorageNode(subject, newStorageNode, addresses.deepCopy(false), remainingNodes);
}
}
}
+ private Subject getSubject(ResourceOperationHistory resourceOperationHistory) {
+ Subject subject = subjectManager.getSubjectByName(resourceOperationHistory.getSubjectName());
+ return SessionManager.getInstance().put(subject);
+ }
+
private void handleAddNodeMaintenance(ResourceOperationHistory resourceOperationHistory) {
StorageNode storageNode = findStorageNode(resourceOperationHistory.getResource());
switch (resourceOperationHistory.getStatus()) {
@@ -238,7 +283,8 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
Configuration parameters = resourceOperationHistory.getParameters();
boolean runRepair = parameters.getSimple(RUN_REPAIR_PROPERTY).getBooleanValue();
PropertyList seedsList = parameters.getList(SEEDS_LIST).deepCopy(false);
- performAddNodeMaintenance(nextNode, runRepair, seedsList);
+ Subject subject = getSubject(resourceOperationHistory);
+ performAddNodeMaintenance(subject, nextNode, runRepair, seedsList);
}
}
}
@@ -277,7 +323,15 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
return set;
}
- private void prepareNodeForBootstrap(StorageNode storageNode, PropertyList addresses) {
+ @Override
+ public void bootstrapStorageNode(Subject subject, StorageNode storageNode) {
+ List<StorageNode> clusterNodes = entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE,
+ StorageNode.class).setParameter("operationMode", StorageNode.OperationMode.NORMAL).getResultList();
+ clusterNodes.add(storageNode);
+ prepareNodeForBootstrap(subject, storageNode, createPropertyListOfAddresses("addresses", clusterNodes));
+ }
+
+ private void prepareNodeForBootstrap(Subject subject, StorageNode storageNode, PropertyList addresses) {
if (log.isInfoEnabled()) {
log.info("Preparing to bootstrap " + storageNode + " into cluster...");
}
@@ -285,11 +339,10 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
ResourceOperationSchedule schedule = new ResourceOperationSchedule();
schedule.setResource(storageNode.getResource());
schedule.setJobTrigger(JobTrigger.createNowTrigger());
- schedule.setSubject(subjectManager.getOverlord());
+ schedule.setSubject(subject);
schedule.setOperationName("prepareForBootstrap");
- StorageClusterSettings clusterSettings = storageClusterSettingsManager.getClusterSettings(
- subjectManager.getOverlord());
+ StorageClusterSettings clusterSettings = storageClusterSettingsManager.getClusterSettings(subject);
Configuration parameters = new Configuration();
parameters.put(new PropertySimple("cqlPort", clusterSettings.getCqlPort()));
parameters.put(new PropertySimple("gossipPort", clusterSettings.getGossipPort()));
@@ -297,7 +350,7 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
schedule.setParameters(parameters);
- operationManager.scheduleResourceOperation(subjectManager.getOverlord(), schedule);
+ operationManager.scheduleResourceOperation(subject, schedule);
}
private StorageNode takeFromQueue(StorageNode lastTaken, StorageNode.OperationMode queue) {
@@ -311,6 +364,40 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
return nodes.get(0);
}
+ private List<String> getRemainingNodes(ResourceOperationHistory resourceOperationHistory) {
+ LinkedList<String> addresses = new LinkedList<String>();
+ Configuration results = resourceOperationHistory.getResults();
+ String remainingNodes = results.getSimpleValue("remainingNodes");
+
+ if (!StringUtil.isEmpty(remainingNodes)) {
+ for (String address : remainingNodes.split(",")) {
+ addresses.add(address);
+ }
+ }
+ return addresses;
+ }
+
+ private StorageNode findStorageNodeByAddress(String address) {
+ try {
+ return entityManager.createNamedQuery(StorageNode.QUERY_FIND_BY_ADDRESS, StorageNode.class)
+ .setParameter("address", address).getSingleResult();
+
+ } catch (PersistenceException e) {
+ throw new StorageNodeDeploymentException("Storage node deployment has failed! Failed to fetch the next " +
+ "storage node at " + address + " to be updated.", e);
+ }
+ }
+
+ private StorageNode findNewStorgeNode(StorageNode.OperationMode operationMode) {
+ try {
+ return entityManager.createNamedQuery(StorageNode.QUERY_FIND_ALL_BY_MODE, StorageNode.class)
+ .setParameter("operationMode", operationMode).getSingleResult();
+ } catch (PersistenceException e) {
+ throw new StorageNodeDeploymentException("Storage node deployment has failed! Failed to fetch the " +
+ "storage node to be deployed.", e);
+ }
+ }
+
private boolean isStorageNodeOperation(OperationDefinition operationDefinition) {
ResourceType resourceType = operationDefinition.getResourceType();
return resourceType.getName().equals(STORAGE_NODE_TYPE_NAME) &&
@@ -411,4 +498,12 @@ public class StorageNodeOperationsHandlerBean implements StorageNodeOperationsHa
return list;
}
+ private List<String> getAddresses(List<StorageNode> storageNodes) {
+ List<String> addresses = new LinkedList<String>();
+ for (StorageNode storageNode : storageNodes) {
+ addresses.add(storageNode.getAddress());
+ }
+ return addresses;
+ }
+
}
diff --git a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
index fcdcd3e..44bb842 100644
--- a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
+++ b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/storage/StorageNodeOperationsHandlerLocal.java
@@ -4,8 +4,8 @@ import java.net.InetAddress;
import javax.ejb.Asynchronous;
+import org.rhq.core.domain.auth.Subject;
import org.rhq.core.domain.cloud.StorageNode;
-import org.rhq.core.domain.configuration.PropertyList;
import org.rhq.core.domain.operation.OperationHistory;
/**
@@ -16,7 +16,11 @@ public interface StorageNodeOperationsHandlerLocal {
@Asynchronous
void handleOperationUpdateIfNecessary(OperationHistory operationHistory);
- void announceNewStorageNode(StorageNode newStorageNode, StorageNode clusterNode, PropertyList addresses);
+ void announceStorageNode(Subject subject, StorageNode storageNode);
- void performAddNodeMaintenance(InetAddress storageNodeAddress);
+ void bootstrapStorageNode(Subject subject, StorageNode storageNode);
+
+ void performAddNodeMaintenanceIfNecessary(InetAddress storageNodeAddress);
+
+ void performAddNodeMaintenance(Subject subject, StorageNode storageNode);
}
diff --git a/modules/plugins/rhq-storage/src/main/java/org/rhq/plugins/storage/StorageNodeComponent.java b/modules/plugins/rhq-storage/src/main/java/org/rhq/plugins/storage/StorageNodeComponent.java
index e76cfa0..7f58037 100644
--- a/modules/plugins/rhq-storage/src/main/java/org/rhq/plugins/storage/StorageNodeComponent.java
+++ b/modules/plugins/rhq-storage/src/main/java/org/rhq/plugins/storage/StorageNodeComponent.java
@@ -255,7 +255,9 @@ public class StorageNodeComponent extends CassandraNodeComponent implements Oper
EmsOperation emsOperation = authBean.getOperation("reloadConfiguration");
emsOperation.invoke();
- result.setSimpleResult("Successfully updated the set of known nodes.");
+ Configuration complexResults = result.getComplexResults();
+ complexResults.put(new PropertySimple("details", "Successfully updated the set of known nodes."));
+ complexResults.put(params.get("remainingNodes").deepCopy(false));
return result;
} catch (InternodeAuthConfUpdateException e) {
diff --git a/modules/plugins/rhq-storage/src/main/resources/META-INF/rhq-plugin.xml b/modules/plugins/rhq-storage/src/main/resources/META-INF/rhq-plugin.xml
index a2d04d0..4ef10cd 100644
--- a/modules/plugins/rhq-storage/src/main/resources/META-INF/rhq-plugin.xml
+++ b/modules/plugins/rhq-storage/src/main/resources/META-INF/rhq-plugin.xml
@@ -102,7 +102,12 @@
<c:list-property name="addresses">
<c:simple-property name="address"/>
</c:list-property>
+ <c:simple-property name="remainingNodes"/>
</parameters>
+ <results>
+ <c:simple-property name="details"/>
+ <c:simple-property name="remainingNodes"/>
+ </results>
</operation>
<operation name="prepareForBootstrap">