modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java
| 124 +++++++---
modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java
| 87 +++++--
2 files changed, 153 insertions(+), 58 deletions(-)
New commits:
commit fdc1cabee3ea72a167f71971a7d4c8f324b8e8db
Author: Jay Shaughnessy <jshaughn(a)redhat.com>
Date: Fri Oct 25 17:47:23 2013 -0400
Bug 1022620 - Windows 2008 - Upgrade to JON3.2.ER3 fails when using oracle jdk7-32b
Second commit for this.
Went one step further. Now for the server and agent we create/set java.io.tmpdir
to InstallDir/temp, if the original java.io.tmpdir is invalid. We'll generate
a warning about the original setting. We'll only exit if our attempt to
create/set a local temp dir fails. This helps us succeed out of box with
the known issues using the 32-bit JVm on Windows, as well as getting
around the issue in general.
diff --git
a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java
b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java
index 77c88b1..c99f4b6 100644
--- a/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java
+++ b/modules/enterprise/agent/src/main/java/org/rhq/enterprise/agent/AgentMain.java
@@ -368,7 +368,7 @@ public class AgentMain {
private VMHealthCheckThread m_vmHealthCheckThread;
/**
- * Counts the number of times the agent has been restarted and holds the reason for
the last restart.
+ * Counts the number of times the agent has been restarted and holds the reason for
the last restart.
*/
private final AgentRestartCounter m_agentRestartCounter = new AgentRestartCounter();
@@ -378,7 +378,7 @@ public class AgentMain {
private boolean m_disableNativeSystem;
/**
- * Thread used to repeatedly ping the server for connectivity, agent avail update,
and clock sync
+ * Thread used to repeatedly ping the server for connectivity, agent avail update,
and clock sync
*/
private ScheduledThreadPoolExecutor m_pingThreadPoolExecutor;
@@ -465,6 +465,62 @@ public class AgentMain {
return;
}
+ private void checkTempDir() {
+ File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+ if (!tmpDir.exists()) {
+ LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] does not exist.");
+ useLocalTmpDir();
+ return;
+ }
+ if (!tmpDir.isDirectory()) {
+ LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not a directory");
+ useLocalTmpDir();
+ return;
+ }
+ if (!tmpDir.canRead() || !tmpDir.canExecute()) {
+ LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not readable");
+ useLocalTmpDir();
+ return;
+ }
+ if (!tmpDir.canWrite()) {
+ LOG.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not writable");
+ useLocalTmpDir();
+ return;
+ }
+ }
+
+ private void useLocalTmpDir() {
+ File localTmpDir = null;
+ try {
+ localTmpDir = new File(new File(getAgentHomeDirectory()), "temp");
+ LOG.info("Using alternate java.io.tmpdir: [" +
localTmpDir.getAbsolutePath() + "]");
+ if (!localTmpDir.exists()) {
+ LOG.info("Creating alternate java.io.tmpdir: [" +
localTmpDir.getAbsolutePath() + "]");
+ localTmpDir.mkdir();
+ }
+ System.setProperty("java.io.tmpdir",
localTmpDir.getAbsolutePath());
+ } catch (Throwable t) {
+ throw new RuntimeException("Startup failed: Could not create or set
local java.io.tmpdir ["
+ + localTmpDir.getAbsolutePath() + "]", t);
+ }
+ if (!localTmpDir.exists()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] does not exist");
+ }
+ if (!localTmpDir.isDirectory()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not a directory");
+ }
+ if (!localTmpDir.canRead() || !localTmpDir.canExecute()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not readable");
+ }
+ if (!localTmpDir.canWrite()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not writable");
+ }
+ }
+
/**
* Constructor for {@link AgentMain} that loads the agent configuration and prepare
some additional internal data.
*
@@ -517,13 +573,15 @@ public class AgentMain {
prepareNativeSystem();
+ checkTempDir();
+
return;
}
/**
* Returns the directory that is considered the "agent home" (i.e. the
directory
* where the agent is installed).
- *
+ *
* @return agent home directory, or empty string if it cannot be determined
*/
public String getAgentHomeDirectory() {
@@ -614,7 +672,7 @@ public class AgentMain {
/**
* This method should be called whenever the server time is known. This helps
* keep the {@link #getAgentServerClockDifference()} up-to-date.
- *
+ *
* @param serverTime the currently know value of the server clock (epoch millis)
*/
public void serverClockNotification(long serverTime) {
@@ -920,7 +978,7 @@ public class AgentMain {
/**
* This will enable/disable agent-server communication tracing. This is for
* use mainly in development but can also be used for troubleshooting problems.
- *
+ *
* @param enabled whether or not to turn on agent comm tracing
*/
public void agentServerCommunicationsTrace(boolean enabled) {
@@ -952,7 +1010,7 @@ public class AgentMain {
* This will hot-deploy a new log4j log configuration file. Use this to change, at
runtime,
* the log settings so you can, for example, begin logging DEBUG messages to help
troubleshoot
* problems.
- *
+ *
* @param logFilePath the path to the log file - relative to the classloader or
filesystem
*
* @throws Exception if failed to hot deploy the new log config
@@ -985,7 +1043,7 @@ public class AgentMain {
/**
* Returns an iteratable list of servers that can be used as backups when this agent
needs to failover
* to another server.
- *
+ *
* @return list of servers (may be empty but will not be
<code>null</code>)
*/
public FailoverListComposite getServerFailoverList() {
@@ -1029,8 +1087,8 @@ public class AgentMain {
/**
* Downloads a new server failover list from the server and returns the failover
list
* that is now in effect.
- * @return
- *
+ * @return
+ *
* @return the server failover list that is now in effect
*/
public FailoverListComposite downloadServerFailoverList() {
@@ -1568,7 +1626,7 @@ public class AgentMain {
* @param wait_ms maximum number of milliseconds to wait
*
* @return <code>true</code> if the server is up,
<code>false</code> if it is not yet up or the agent has shutdown
- *
+ *
* @throws AgentNotSupportedException If the server is up but it told us we are the
wrong version, then this is thrown.
* When this is thrown, the agent is currently in
the midst of updating itself.
*/
@@ -1940,9 +1998,9 @@ public class AgentMain {
* port and transport parameters being used to talk to the current server
* will stay the same. Otherwise, it will be assumed the server is a
* full endpoint URL.
- *
+ *
* @param server the host of the server to switch to, or a full server endpoint URL
- *
+ *
* @return <code>true</code> if successfully switched,
<code>false</code> otherwise
*/
public boolean switchToServer(String server) {
@@ -1996,9 +2054,9 @@ public class AgentMain {
/**
* Switches the agent to talk to the next server in the failover list.
- *
+ *
* This is package-scoped so the failover callback can call this.
- *
+ *
* @param comm the communicator object whose endpoint needs to be switched to the
next server
* the caller must ensure the remote communicator provided to this method
is the
* same communicator used by this agent's {@link
#getClientCommandSender() sender}.
@@ -2036,12 +2094,12 @@ public class AgentMain {
/**
* Immediately switches the given communicator to the given server.
- *
+ *
* @param comm the communicator whose server is switched
* @param newServer the endpoint of the new server
* @param transport the transport that should be used in the new remote endpoint URL
* @param transportParams the transport params that should be used in the new remote
endpoint URL
- *
+ *
* @return <code>true</code> if successfully switched;
<code>false</code> otherwise
*/
private boolean switchCommServer(RemoteCommunicator comm, ServerEntry newServer,
String transport,
@@ -2096,12 +2154,12 @@ public class AgentMain {
* is making the server its primary server and will begin sending it messages. The
request
* is sent such that the communicator's initialize callback will never be
invoked, however,
* the caller can ask for the request to attempt failover.
- *
+ *
* <p>This is package scoped so the initialize callback can call
this</p>
- *
+ *
* @param comm the communicator used to send the message to the server
* @param attemptFailover if <code>true</code>, and the connect command
fails, server failover will be attempted
- *
+ *
* @throws Throwable
*/
void sendConnectRequestToServer(RemoteCommunicator comm, boolean attemptFailover)
throws Throwable {
@@ -2203,7 +2261,7 @@ public class AgentMain {
/**
* Returns the agent restart counter object.
- *
+ *
* @return the agent restart counter
*/
public AgentRestartCounter getAgentRestartCounter() {
@@ -2351,7 +2409,7 @@ public class AgentMain {
m_commServices.start(m_configuration.getPreferences(),
m_configuration.getClientCommandSenderConfiguration());
// prime the sender so it can be prepared to start sending messages.
- // if auto-discovery is enabled, then the auto-discovery listener will tell the
sender when its OK to start
+ // if auto-discovery is enabled, then the auto-discovery listener will tell the
sender when its OK to start
// sending. Otherwise start polling and let the poller tell the sender when it
is ok to start sending.
if (!isAutoDiscoveryEnabled()) {
LOG.info(AgentI18NResourceKeys.NO_AUTO_DETECT);
@@ -2402,7 +2460,7 @@ public class AgentMain {
/**
* This will prepare the auto-discovery listener, if server auto-detection is
enabled.
- *
+ *
* @throws Exception
*/
private void prepareAutoDiscoveryListener() throws Exception {
@@ -2638,17 +2696,17 @@ public class AgentMain {
/**
* Creates a raw remote communicator that can talk to the given endpoint.
- *
+ *
* This is public-scoped so the {@link PrimaryServerSwitchoverThread} can use this
* and the {@link IdentifyPromptCommand} can use this.
- *
+ *
* @param transport
* @param address
* @param port
* @param transportParams
- *
+ *
* @return the remote communicator
- *
+ *
* @throws Exception if the communicator could not be created
*/
public RemoteCommunicator createServerRemoteCommunicator(String transport, String
address, int port,
@@ -2741,7 +2799,7 @@ public class AgentMain {
* Given a failover list, this makes very rudimentary connection attempts to each
server to see if
* this agent can at least reach the server endpoints. If an endpoint cannot be
reached,
* a warning is logged.
- *
+ *
* @param failoverList the list of servers this agent will potentially need to talk
to.
* @return the servers that failed to be connected to
*/
@@ -2804,7 +2862,7 @@ public class AgentMain {
* Given a failover list, this will persist it so the agent can recover it if the
agent itself fails.
* If this method fails to persist the list, an error is logged but otherwise this
method
* returns normally.
- *
+ *
* @param failoverList the failover list to persist (may be
<code>null</code>)
*/
private void storeServerFailoverList(FailoverListComposite failoverList) {
@@ -3184,7 +3242,7 @@ public class AgentMain {
}
if (m_daemonMode) {
-
AgentInputReaderFactory.setConsoleType(AgentInputReaderFactory.ConsoleType.java); //
don't use native libs, no need and jline causes problems
+
AgentInputReaderFactory.setConsoleType(AgentInputReaderFactory.ConsoleType.java); //
don't use native libs, no need and jline causes problems
} else if (console_type != null) {
AgentInputReaderFactory.setConsoleType(console_type);
}
@@ -3373,7 +3431,7 @@ public class AgentMain {
return args.toArray(new String[args.size()]);
}
- // perform any other massaging
+ // perform any other massaging
private String safeArg(String arg) {
// remove trailing '=' from long option args. For example --plugin=
should just be --plugin for
// downstream processing.
@@ -3642,7 +3700,7 @@ public class AgentMain {
* </ol>
* By restarting the plugin container in such conditions, we essentially re-run the
resource upgrade
* and let the plugin container try to re-merge with the server that we know has just
connected.
- *
+ *
* @author Lukas Krejci
*/
private class PluginContainerConditionalRestartListener implements
ClientCommandSenderStateListener {
@@ -3750,7 +3808,7 @@ public class AgentMain {
try {
// if we can't send to the server ignore the ping
if (!m_clientSender.isSending()) {
- // An unlikely state, but if we're not sending, not polling and
not performing autoDiscovery
+ // An unlikely state, but if we're not sending, not polling and
not performing autoDiscovery
// (multicast), then start polling to we eventually get out of this
state.
if (!(m_clientSender.isServerPolling() || isAutoDiscoveryEnabled()))
{
LOG.info(AgentI18NResourceKeys.PING_EXECUTOR_STARTING_POLLING);
@@ -3760,7 +3818,7 @@ public class AgentMain {
return;
}
- // we are in sending mode, so make sure the poller is off
+ // we are in sending mode, so make sure the poller is off
if (m_clientSender.isServerPolling()) {
LOG.info(AgentI18NResourceKeys.PING_EXECUTOR_STOPPING_POLLING_RESUME_PING);
m_clientSender.stopServerPolling();
diff --git
a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java
b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java
index d5878c6..941ae93 100644
---
a/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java
+++
b/modules/enterprise/server/jar/src/main/java/org/rhq/enterprise/server/core/StartupBean.java
@@ -236,20 +236,56 @@ public class StartupBean implements StartupLocal {
private void checkTempDir() {
File tmpDir = new File(System.getProperty("java.io.tmpdir"));
if (!tmpDir.exists()) {
- throw new RuntimeException("Startup failed: java.io.tmpdir '" +
tmpDir.getAbsolutePath()
- + "' does not exist");
+ log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] does not exist.");
+ useLocalTmpDir();
+ return;
}
if (!tmpDir.isDirectory()) {
- throw new RuntimeException("Startup failed: java.io.tmpdir '" +
tmpDir.getAbsolutePath()
- + "' is not a directory");
+ log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not a directory");
+ useLocalTmpDir();
+ return;
}
if (!tmpDir.canRead() || !tmpDir.canExecute()) {
- throw new RuntimeException("Startup failed: java.io.tmpdir '" +
tmpDir.getAbsolutePath()
- + "' is not readable");
+ log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not readable");
+ useLocalTmpDir();
+ return;
}
if (!tmpDir.canWrite()) {
- throw new RuntimeException("Startup failed: java.io.tmpdir '" +
tmpDir.getAbsolutePath()
- + "' is not writable");
+ log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() +
"] is not writable");
+ useLocalTmpDir();
+ return;
+ }
+ }
+
+ private void useLocalTmpDir() {
+ File localTmpDir = null;
+ try {
+ localTmpDir = new File(LookupUtil.getCoreServer().getInstallDir(),
"temp");
+ log.info("Using alternate java.io.tmpdir: [" +
localTmpDir.getAbsolutePath() + "]");
+ if (!localTmpDir.exists()) {
+ log.info("Creating alternate java.io.tmpdir: [" +
localTmpDir.getAbsolutePath() + "]");
+ localTmpDir.mkdir();
+ }
+ System.setProperty("java.io.tmpdir",
localTmpDir.getAbsolutePath());
+ } catch (Throwable t) {
+ throw new RuntimeException("Startup failed: Could not create or set
local java.io.tmpdir ["
+ + localTmpDir.getAbsolutePath() + "]", t);
+ }
+ if (!localTmpDir.exists()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] does not exist");
+ }
+ if (!localTmpDir.isDirectory()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not a directory");
+ }
+ if (!localTmpDir.canRead() || !localTmpDir.canExecute()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not readable");
+ }
+ if (!localTmpDir.canWrite()) {
+ throw new RuntimeException("Startup failed: local java.io.tmpdir ["
+ localTmpDir.getAbsolutePath()
+ + "] is not writable");
}
}
@@ -546,19 +582,19 @@ public class StartupBean implements StartupLocal {
}
}
- /**
- * This seeds the agent clients cache with clients for all known agents. These
clients will be started so they can
- * immediately begin to send any persisted guaranteed messages that might already
exist. This method must be called
- * at a time when the server is ready to accept messages from agents because any
guaranteed messages that are
- * delivered might trigger the agents to send messages back to the server.
- *
- * NOTE: we don't need to do this - so far, none of the messages the server sends
to the agent are marked
- * with "guaranteed delivery" (this is on purpose and a good thing) so we
don't need to start all the agent clients
- * in case they have persisted messages. Since the number of agents could be large
this cache could be huge and
- * take some time to initialize. If we don't call this, it speeds up start up,
and doesn't bloat memory with
- * clients we might not ever need (since agents might have affinity to other
servers). Agent clients
- * can be created lazily at runtime when the server needs it.
- */
+ /**
+ * This seeds the agent clients cache with clients for all known agents. These
clients will be started so they can
+ * immediately begin to send any persisted guaranteed messages that might already
exist. This method must be called
+ * at a time when the server is ready to accept messages from agents because any
guaranteed messages that are
+ * delivered might trigger the agents to send messages back to the server.
+ *
+ * NOTE: we don't need to do this - so far, none of the messages the server
sends to the agent are marked
+ * with "guaranteed delivery" (this is on purpose and a good thing) so we
don't need to start all the agent clients
+ * in case they have persisted messages. Since the number of agents could be large
this cache could be huge and
+ * take some time to initialize. If we don't call this, it speeds up start up,
and doesn't bloat memory with
+ * clients we might not ever need (since agents might have affinity to other
servers). Agent clients
+ * can be created lazily at runtime when the server needs it.
+ */
private void startAgentClients() {
log.info("Starting agent clients - any persisted messages with guaranteed
delivery will be sent...");
@@ -598,7 +634,7 @@ public class StartupBean implements StartupLocal {
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60;
schedulerBean.scheduleSimpleRepeatingJob(SavedSearchResultCountRecalculationJob.class,
true, false,
- initialDelay, interval);
+ initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule asynchronous resource deletion job.",
e);
}
@@ -636,7 +672,7 @@ public class StartupBean implements StartupLocal {
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60;
schedulerBean.scheduleSimpleRepeatingJob(DynaGroupAutoRecalculationJob.class,
true, false, initialDelay,
- interval);
+ interval);
} catch (Exception e) {
log.error("Cannot schedule DynaGroup auto-recalculation job.", e);
}
@@ -738,9 +774,9 @@ public class StartupBean implements StartupLocal {
log.error("Cannot create storage cluster init job", e);
}
}
-
+
try {
- String cronString = "0 30 0 ? * SUN *"; // every sunday starting
at 00:30.
+ String cronString = "0 30 0 ? * SUN *"; // every sunday starting at
00:30.
schedulerBean.scheduleSimpleCronJob(StorageClusterReadRepairJob.class, true,
true, cronString);
} catch (Exception e) {
log.error("Cannot create storage cluster read repair job", e);
@@ -754,6 +790,7 @@ public class StartupBean implements StartupLocal {
*
* @deprecated we don't have an embedded agent anymore, leaving this in case we
resurrect it
*/
+ @Deprecated
private void startEmbeddedAgent() throws RuntimeException {
// we can't use EmbeddedAgentBootstrapServiceMBean because if the embedded
agent
// isn't installed, that class will not be available; we must use JMX API