From 13ef30ea636c7b372c45458b30996ea1f1fc6ff9 Mon Sep 17 00:00:00 2001 From: Shudong Zhou <shudongzhou@gmail.com> Date: Mon, 12 Nov 2012 20:05:05 -0800 Subject: [PATCH] Make controller role MASTER if no HA --- .../core/internal/Controller.java | 46 ++++++---------- .../core/internal/RoleChanger.java | 55 +++++++++++++------ .../core/internal/ControllerTest.java | 12 ++-- .../core/internal/RoleChangerTest.java | 54 +++++++++++------- 4 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/main/java/net/floodlightcontroller/core/internal/Controller.java b/src/main/java/net/floodlightcontroller/core/internal/Controller.java index cd840ece7..f66d26f83 100644 --- a/src/main/java/net/floodlightcontroller/core/internal/Controller.java +++ b/src/main/java/net/floodlightcontroller/core/internal/Controller.java @@ -722,32 +722,19 @@ public class Controller implements IFloodlightProviderService, // *and* send the current role to the new switch. connectedSwitches.add(sw); - if (role != null) { - // Send a role request if role support is enabled for the controller - // This is a probe that we'll use to determine if the switch - // actually supports the role request message. If it does we'll - // get back a role reply message. If it doesn't we'll get back an - // OFError message. - // If role is MASTER we will promote switch to active - // list when we receive the switch's role reply messages - log.debug("This controller's role is {}, " + - "sending initial role request msg to {}", - role, sw); - Collection<IOFSwitch> swList = new ArrayList<IOFSwitch>(1); - swList.add(sw); - roleChanger.submitRequest(swList, role); - } - else { - // Role supported not enabled on controller (for now) - // automatically promote switch to active state. - log.debug("This controller's role is null, " + - "not sending role request msg to {}", - role, sw); - // Need to clear FlowMods before we add the switch - // and dispatch updates otherwise we have a race condition. - addSwitch(sw, true); - state.firstRoleReplyReceived = true; - } + // Send a role request. + // This is a probe that we'll use to determine if the switch + // actually supports the role request message. If it does we'll + // get back a role reply message. If it doesn't we'll get back an + // OFError message. + // If role is MASTER we will promote switch to active + // list when we receive the switch's role reply messages + log.debug("This controller's role is {}, " + + "sending initial role request msg to {}", + role, sw); + Collection<IOFSwitch> swList = new ArrayList<IOFSwitch>(1); + swList.add(sw); + roleChanger.submitRequest(swList, role); } } } @@ -783,8 +770,6 @@ public class Controller implements IFloodlightProviderService, sw.setFloodlightProvider(Controller.this); sw.setThreadPoolService(threadPool); sw.setFeaturesReply(state.featuresReply); - sw.setAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA, - state.description); sw.setSwitchProperties(state.description); readPropertyFromStorage(); @@ -1058,6 +1043,7 @@ public class Controller implements IFloodlightProviderService, // request/reply style messages. OFError error = (OFError) m; boolean shouldLogError = true; + // TODO: should we check that firstRoleReplyReceived is false, // i.e., check only whether the first request fails? if (roleChanger.checkFirstPendingRoleRequestXid( @@ -1075,7 +1061,7 @@ public class Controller implements IFloodlightProviderService, // is not a spurious error shouldLogError = !isBadVendorError; if (isBadVendorError) { - if (state.firstRoleReplyReceived && (role != null)) { + if (state.firstRoleReplyReceived) { log.warn("Received ERROR from sw {} that " +"indicates roles are not supported " +"but we have received a valid " @@ -1749,7 +1735,7 @@ public class Controller implements IFloodlightProviderService, recommendation=LogMessageDoc.CHECK_CONTROLLER) }) protected Role getInitialRole(Map<String, String> configParams) { - Role role = null; + Role role = Role.MASTER; String roleString = configParams.get("role"); if (roleString == null) { String rolePath = configParams.get("rolepath"); diff --git a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java index 0028ddd3c..0526c8ecb 100644 --- a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java +++ b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java @@ -14,6 +14,7 @@ import java.util.concurrent.TimeUnit; import org.openflow.protocol.OFMessage; import org.openflow.protocol.OFType; import org.openflow.protocol.OFVendor; +import org.openflow.protocol.statistics.OFDescriptionStatistics; import org.openflow.vendor.nicira.OFNiciraVendorData; import org.openflow.vendor.nicira.OFRoleRequestVendorData; import org.openflow.vendor.nicira.OFRoleVendorData; @@ -21,7 +22,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import net.floodlightcontroller.core.FloodlightContext; -import net.floodlightcontroller.core.IFloodlightProviderService; import net.floodlightcontroller.core.IFloodlightProviderService.Role; import net.floodlightcontroller.core.IOFSwitch; import net.floodlightcontroller.core.annotations.LogMessageDoc; @@ -133,9 +133,9 @@ public class RoleChanger { protected long timeout; protected ConcurrentHashMap<IOFSwitch, LinkedList<PendingRoleRequestEntry>> pendingRequestMap; - private IFloodlightProviderService floodlightProvider; + private Controller controller; - protected static long DEFAULT_TIMEOUT = 15L*1000*1000*1000L; // 15s + protected static long DEFAULT_TIMEOUT = 5L*1000*1000*1000L; // 5s protected static Logger log = LoggerFactory.getLogger(RoleChanger.class); private static final String HA_CHECK_SWITCH = "Check the health of the indicated switch. If the problem " + @@ -207,17 +207,25 @@ public class RoleChanger { try { while (true) { try { - t = pendingTasks.take(); + t = pendingTasks.poll(); + if (t == null) { + // Notify when there is no immediate tasks to run + // For the convenience of RoleChanger unit tests + synchronized (pendingTasks) { + pendingTasks.notifyAll(); + } + t = pendingTasks.take(); + } } catch (InterruptedException e) { // see http://www.ibm.com/developerworks/java/library/j-jtp05236/index.html interrupted = true; continue; } if (t.type == RoleChangeTask.Type.REQUEST) { + t.deadline += timeout; sendRoleRequest(t.switches, t.role, t.deadline); // Queue the timeout t.type = RoleChangeTask.Type.TIMEOUT; - t.deadline += timeout; pendingTasks.put(t); } else { @@ -244,8 +252,8 @@ public class RoleChanger { } - public RoleChanger(IFloodlightProviderService floodlightProvider) { - this.floodlightProvider = floodlightProvider; + public RoleChanger(Controller controller) { + this.controller = controller; this.pendingRequestMap = new ConcurrentHashMap<IOFSwitch, LinkedList<PendingRoleRequestEntry>>(); this.pendingTasks = new DelayQueue<RoleChangeTask>(); @@ -334,10 +342,18 @@ public class RoleChanger { protected void verifyRoleReplyReceived(Collection<IOFSwitch> switches, long cookie) { for (IOFSwitch sw: switches) { - if (checkFirstPendingRoleRequestCookie(sw, cookie)) { - sw.setHARole(null, false); - log.warn("Timeout while waiting for role reply from switch {}." - + " Disconnecting", sw); + PendingRoleRequestEntry entry = + checkFirstPendingRoleRequestCookie(sw, cookie); + if (entry != null){ + log.warn("Timeout while waiting for role reply from switch {}" + + " with datapath {}", sw, + sw.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA)); + sw.setHARole(entry.role, false); + if (entry.role == Role.SLAVE) { + sw.disconnectOutputStream(); + } else { + controller.addSwitch(sw, true); + } } } } @@ -436,24 +452,27 @@ public class RoleChanger { /** * Checks whether the given request cookie matches the cookie of the first - * pending request + * pending request. If so, return the entry * @param cookie * @return */ - public boolean checkFirstPendingRoleRequestCookie(IOFSwitch sw, long cookie) + protected PendingRoleRequestEntry checkFirstPendingRoleRequestCookie( + IOFSwitch sw, long cookie) { LinkedList<PendingRoleRequestEntry> pendingRoleRequests = pendingRequestMap.get(sw); if (pendingRoleRequests == null) { - return false; + return null; } synchronized(pendingRoleRequests) { PendingRoleRequestEntry head = pendingRoleRequests.peek(); if (head == null) - return false; - else - return head.cookie == cookie; + return null; + if (head.cookie == cookie) { + return pendingRoleRequests.poll(); + } } + return null; } /** @@ -539,7 +558,7 @@ public class RoleChanger { } // Construct the role request message - OFVendor roleRequest = (OFVendor)floodlightProvider. + OFVendor roleRequest = (OFVendor)controller. getOFMessageFactory().getMessage(OFType.VENDOR); roleRequest.setXid(xid); roleRequest.setVendor(OFNiciraVendorData.NX_VENDOR_ID); diff --git a/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java b/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java index e37b752cb..a719bf6bd 100644 --- a/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java +++ b/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java @@ -762,10 +762,11 @@ public class ControllerTest extends FloodlightTestCase assertTrue("Check that update is HARoleUpdate", upd instanceof Controller.HARoleUpdate); Controller.HARoleUpdate roleUpd = (Controller.HARoleUpdate)upd; - assertSame(null, roleUpd.oldRole); + assertSame(Role.MASTER, roleUpd.oldRole); assertSame(Role.SLAVE, roleUpd.newRole); } + @SuppressWarnings("unchecked") @Test public void testCheckSwitchReady() { OFChannelState state = new OFChannelState(); @@ -825,13 +826,14 @@ public class ControllerTest extends FloodlightTestCase // setupSwitchForAddSwitch(chdlr.sw, 0L); // chdlr.sw.clearAllFlowMods(); desc.setManufacturerDescription("test vendor"); + controller.roleChanger.submitRequest( + (List<IOFSwitch>)EasyMock.anyObject(), + (Role)EasyMock.anyObject()); replay(controller.roleChanger); chdlr.checkSwitchReady(); verify(controller.roleChanger); assertSame(OFChannelState.HandshakeState.READY, state.hsState); - assertSame(chdlr.sw, controller.activeSwitches.get(0L)); - assertTrue(controller.connectedSwitches.contains(chdlr.sw)); - assertTrue(state.firstRoleReplyReceived); + assertFalse(state.firstRoleReplyReceived); reset(controller.roleChanger); controller.connectedSwitches.clear(); controller.activeSwitches.clear(); @@ -850,7 +852,7 @@ public class ControllerTest extends FloodlightTestCase assertSame(OFChannelState.HandshakeState.READY, state.hsState); assertTrue(controller.activeSwitches.isEmpty()); assertFalse(controller.connectedSwitches.isEmpty()); - assertTrue(state.firstRoleReplyReceived); + assertFalse(state.firstRoleReplyReceived); Collection<IOFSwitch> swList = swListCapture.getValue(); assertEquals(1, swList.size()); } diff --git a/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java b/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java index 9c49aad3d..06795e4dd 100644 --- a/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java +++ b/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java @@ -17,7 +17,6 @@ import java.util.LinkedList; import java.util.List; import net.floodlightcontroller.core.FloodlightContext; -import net.floodlightcontroller.core.IFloodlightProviderService; import net.floodlightcontroller.core.IOFSwitch; import net.floodlightcontroller.core.IFloodlightProviderService.Role; import net.floodlightcontroller.core.internal.RoleChanger.PendingRoleRequestEntry; @@ -39,11 +38,11 @@ import org.openflow.vendor.nicira.OFRoleVendorData; public class RoleChangerTest { public RoleChanger roleChanger; - IFloodlightProviderService controller; + Controller controller; @Before public void setUp() throws Exception { - controller = createMock(IFloodlightProviderService.class); + controller = createMock(Controller.class); roleChanger = new RoleChanger(controller); BasicFactory factory = new BasicFactory(); expect(controller.getOFMessageFactory()).andReturn(factory).anyTimes(); @@ -179,8 +178,10 @@ public class RoleChangerTest { PendingRoleRequestEntry entry = new PendingRoleRequestEntry(1, Role.MASTER, 123456); pendingList2.add(entry); - // Timed out switch should disconnect - sw2.setHARole(null, false); + // Timed out switch should become active + expect(sw2.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA)) + .andReturn(null); + sw2.setHARole(Role.MASTER, false); EasyMock.expectLastCall(); switches.add(sw2); @@ -219,7 +220,7 @@ public class RoleChangerTest { @Test public void testSubmitRequest() throws Exception { LinkedList<IOFSwitch> switches = new LinkedList<IOFSwitch>(); - roleChanger.timeout = 500*1000*1000; // 500 ms + roleChanger.timeout = 100*1000*1000; // 100 ms // a switch that supports role requests OFSwitchImpl sw1 = EasyMock.createStrictMock(OFSwitchImpl.class); @@ -235,26 +236,41 @@ public class RoleChangerTest { expect(sw1.getNextTransactionId()).andReturn(2); sw1.write((List<OFMessage>)EasyMock.anyObject(), (FloodlightContext)EasyMock.anyObject()); + expect(sw1.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA)) + .andReturn(null); + sw1.setHARole(Role.MASTER, false); + expect(sw1.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA)) + .andReturn(null); + sw1.setHARole(Role.SLAVE, false); + // Disconnect on timing out SLAVE request + sw1.disconnectOutputStream(); switches.add(sw1); + // Add to switch when timing out the MASTER request + controller.addSwitch(sw1, true); + + replay(sw1, controller); roleChanger.submitRequest(switches, Role.MASTER); roleChanger.submitRequest(switches, Role.SLAVE); - // Wait until role request has been sent. - // TODO: need to get rid of this sleep somehow - Thread.sleep(100); - // Now there should be exactly one timeout task pending + // Wait until role request has been sent. + synchronized (roleChanger.pendingTasks) { + while (RoleChanger.RoleChangeTask.Type.TIMEOUT != + roleChanger.pendingTasks.peek().type) { + roleChanger.pendingTasks.wait(); + } + } + // Now there should be exactly one timeout task pending for each request assertEquals(2, roleChanger.pendingTasks.size()); - // Make sure it's indeed a timeout task - assertSame(RoleChanger.RoleChangeTask.Type.TIMEOUT, - roleChanger.pendingTasks.peek().type); // Check that RoleChanger indeed made a copy of switches collection assertNotSame(switches, roleChanger.pendingTasks.peek().switches); // Wait until the timeout triggers - // TODO: get rid of this sleep too. - Thread.sleep(500); - assertEquals(0, roleChanger.pendingTasks.size()); + synchronized (roleChanger.pendingTasks) { + while (roleChanger.pendingTasks.size() != 0) { + roleChanger.pendingTasks.wait(); + } + } verify(sw1, controller); } @@ -375,12 +391,12 @@ public class RoleChangerTest { Role role = Role.MASTER; OFSwitchImpl sw = new OFSwitchImpl(); setupPendingRoleRequest(sw, xid, role, cookie); - assertEquals(true, + assertNotSame(null, roleChanger.checkFirstPendingRoleRequestCookie(sw, cookie)); - assertEquals(false, + assertEquals(null, roleChanger.checkFirstPendingRoleRequestCookie(sw, 0)); roleChanger.pendingRequestMap.get(sw).clear(); - assertEquals(false, + assertEquals(null, roleChanger.checkFirstPendingRoleRequestCookie(sw, cookie)); } -- GitLab