From 13ef30ea636c7b372c45458b30996ea1f1fc6ff9 Mon Sep 17 00:00:00 2001
From: Shudong Zhou <shudongzhou@gmail.com>
Date: Mon, 12 Nov 2012 20:05:05 -0800
Subject: [PATCH] Make controller role MASTER if no HA

---
 .../core/internal/Controller.java             | 46 ++++++----------
 .../core/internal/RoleChanger.java            | 55 +++++++++++++------
 .../core/internal/ControllerTest.java         | 12 ++--
 .../core/internal/RoleChangerTest.java        | 54 +++++++++++-------
 4 files changed, 95 insertions(+), 72 deletions(-)

diff --git a/src/main/java/net/floodlightcontroller/core/internal/Controller.java b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
index cd840ece7..f66d26f83 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/Controller.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/Controller.java
@@ -722,32 +722,19 @@ public class Controller implements IFloodlightProviderService,
                     // *and* send the current role to the new switch.
                     connectedSwitches.add(sw);
                     
-                    if (role != null) {
-                        // Send a role request if role support is enabled for the controller
-                        // This is a probe that we'll use to determine if the switch
-                        // actually supports the role request message. If it does we'll
-                        // get back a role reply message. If it doesn't we'll get back an
-                        // OFError message. 
-                        // If role is MASTER we will promote switch to active
-                        // list when we receive the switch's role reply messages
-                        log.debug("This controller's role is {}, " + 
-                                "sending initial role request msg to {}",
-                                role, sw);
-                        Collection<IOFSwitch> swList = new ArrayList<IOFSwitch>(1);
-                        swList.add(sw);
-                        roleChanger.submitRequest(swList, role);
-                    }
-                    else {
-                        // Role supported not enabled on controller (for now)
-                        // automatically promote switch to active state. 
-                        log.debug("This controller's role is null, " + 
-                                "not sending role request msg to {}",
-                                role, sw);
-                        // Need to clear FlowMods before we add the switch
-                        // and dispatch updates otherwise we have a race condition.
-                        addSwitch(sw, true);
-                        state.firstRoleReplyReceived = true;
-                    }
+                    // Send a role request.
+                    // This is a probe that we'll use to determine if the switch
+                    // actually supports the role request message. If it does we'll
+                    // get back a role reply message. If it doesn't we'll get back an
+                    // OFError message. 
+                    // If role is MASTER we will promote switch to active
+                    // list when we receive the switch's role reply messages
+                    log.debug("This controller's role is {}, " + 
+                            "sending initial role request msg to {}",
+                            role, sw);
+                    Collection<IOFSwitch> swList = new ArrayList<IOFSwitch>(1);
+                    swList.add(sw);
+                    roleChanger.submitRequest(swList, role);
                 }
             }
         }
@@ -783,8 +770,6 @@ public class Controller implements IFloodlightProviderService,
             sw.setFloodlightProvider(Controller.this);
             sw.setThreadPoolService(threadPool);
             sw.setFeaturesReply(state.featuresReply);
-            sw.setAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA,
-                        state.description);
             sw.setSwitchProperties(state.description);
             readPropertyFromStorage();
 
@@ -1058,6 +1043,7 @@ public class Controller implements IFloodlightProviderService,
                     // request/reply style messages.
                     OFError error = (OFError) m;
                     boolean shouldLogError = true;
+
                     // TODO: should we check that firstRoleReplyReceived is false,
                     // i.e., check only whether the first request fails?
                     if (roleChanger.checkFirstPendingRoleRequestXid(
@@ -1075,7 +1061,7 @@ public class Controller implements IFloodlightProviderService,
                         // is not a spurious error
                         shouldLogError = !isBadVendorError;
                         if (isBadVendorError) {
-                            if (state.firstRoleReplyReceived && (role != null)) {
+                            if (state.firstRoleReplyReceived) {
                                 log.warn("Received ERROR from sw {} that "
                                           +"indicates roles are not supported "
                                           +"but we have received a valid "
@@ -1749,7 +1735,7 @@ public class Controller implements IFloodlightProviderService,
                 recommendation=LogMessageDoc.CHECK_CONTROLLER)
     })
     protected Role getInitialRole(Map<String, String> configParams) {
-        Role role = null;
+        Role role = Role.MASTER;
         String roleString = configParams.get("role");
         if (roleString == null) {
             String rolePath = configParams.get("rolepath");
diff --git a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
index 0028ddd3c..0526c8ecb 100644
--- a/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
+++ b/src/main/java/net/floodlightcontroller/core/internal/RoleChanger.java
@@ -14,6 +14,7 @@ import java.util.concurrent.TimeUnit;
 import org.openflow.protocol.OFMessage;
 import org.openflow.protocol.OFType;
 import org.openflow.protocol.OFVendor;
+import org.openflow.protocol.statistics.OFDescriptionStatistics;
 import org.openflow.vendor.nicira.OFNiciraVendorData;
 import org.openflow.vendor.nicira.OFRoleRequestVendorData;
 import org.openflow.vendor.nicira.OFRoleVendorData;
@@ -21,7 +22,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import net.floodlightcontroller.core.FloodlightContext;
-import net.floodlightcontroller.core.IFloodlightProviderService;
 import net.floodlightcontroller.core.IFloodlightProviderService.Role;
 import net.floodlightcontroller.core.IOFSwitch;
 import net.floodlightcontroller.core.annotations.LogMessageDoc;
@@ -133,9 +133,9 @@ public class RoleChanger {
     protected long timeout;
     protected ConcurrentHashMap<IOFSwitch, LinkedList<PendingRoleRequestEntry>>
                 pendingRequestMap;
-    private IFloodlightProviderService floodlightProvider;
+    private Controller controller;
     
-    protected static long DEFAULT_TIMEOUT = 15L*1000*1000*1000L; // 15s
+    protected static long DEFAULT_TIMEOUT = 5L*1000*1000*1000L; // 5s
     protected static Logger log = LoggerFactory.getLogger(RoleChanger.class);
     private static final String HA_CHECK_SWITCH = 
             "Check the health of the indicated switch.  If the problem " +
@@ -207,17 +207,25 @@ public class RoleChanger {
             try {
                 while (true) {
                     try {
-                        t = pendingTasks.take();
+                        t = pendingTasks.poll();
+                        if (t == null) {
+                            // Notify when there is no immediate tasks to run
+                            // For the convenience of RoleChanger unit tests
+                            synchronized (pendingTasks) {
+                                pendingTasks.notifyAll();
+                            }
+                            t = pendingTasks.take();
+                        }
                     } catch (InterruptedException e) {
                         // see http://www.ibm.com/developerworks/java/library/j-jtp05236/index.html
                         interrupted = true;
                         continue;
                     }
                     if (t.type == RoleChangeTask.Type.REQUEST) {
+                        t.deadline += timeout;
                         sendRoleRequest(t.switches, t.role, t.deadline);
                         // Queue the timeout
                         t.type = RoleChangeTask.Type.TIMEOUT;
-                        t.deadline += timeout;
                         pendingTasks.put(t);
                     }
                     else {
@@ -244,8 +252,8 @@ public class RoleChanger {
         
     }
 
-    public RoleChanger(IFloodlightProviderService floodlightProvider) {
-        this.floodlightProvider = floodlightProvider;
+    public RoleChanger(Controller controller) {
+        this.controller = controller;
         this.pendingRequestMap = new ConcurrentHashMap<IOFSwitch,
                 LinkedList<PendingRoleRequestEntry>>();
         this.pendingTasks = new DelayQueue<RoleChangeTask>();
@@ -334,10 +342,18 @@ public class RoleChanger {
     protected void verifyRoleReplyReceived(Collection<IOFSwitch> switches,
                                    long cookie) {
         for (IOFSwitch sw: switches) {
-            if (checkFirstPendingRoleRequestCookie(sw, cookie)) {
-                sw.setHARole(null,  false);
-                log.warn("Timeout while waiting for role reply from switch {}."
-                         + " Disconnecting", sw);
+            PendingRoleRequestEntry entry =
+                    checkFirstPendingRoleRequestCookie(sw, cookie);
+            if (entry != null){
+                log.warn("Timeout while waiting for role reply from switch {}"
+                         + " with datapath {}", sw,
+                         sw.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA));
+                sw.setHARole(entry.role, false);
+                if (entry.role == Role.SLAVE) {
+                    sw.disconnectOutputStream();
+                } else {
+                    controller.addSwitch(sw, true);
+                }
             }
         }
     }
@@ -436,24 +452,27 @@ public class RoleChanger {
     
     /**
      * Checks whether the given request cookie matches the cookie of the first 
-     * pending request
+     * pending request. If so, return the entry
      * @param cookie
      * @return
      */
-    public boolean checkFirstPendingRoleRequestCookie(IOFSwitch sw, long cookie)
+    protected PendingRoleRequestEntry checkFirstPendingRoleRequestCookie(
+            IOFSwitch sw, long cookie)
     {
         LinkedList<PendingRoleRequestEntry> pendingRoleRequests =
                 pendingRequestMap.get(sw);
         if (pendingRoleRequests == null) {
-            return false;
+            return null;
         }
         synchronized(pendingRoleRequests) {
             PendingRoleRequestEntry head = pendingRoleRequests.peek();
             if (head == null)
-                return false;
-            else 
-                return head.cookie == cookie;
+                return null;
+            if (head.cookie == cookie) {
+                return pendingRoleRequests.poll();
+            }
         }
+        return null;
     }
     
     /**
@@ -539,7 +558,7 @@ public class RoleChanger {
         }
 
         // Construct the role request message
-        OFVendor roleRequest = (OFVendor)floodlightProvider.
+        OFVendor roleRequest = (OFVendor)controller.
                 getOFMessageFactory().getMessage(OFType.VENDOR);
         roleRequest.setXid(xid);
         roleRequest.setVendor(OFNiciraVendorData.NX_VENDOR_ID);
diff --git a/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java b/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java
index e37b752cb..a719bf6bd 100644
--- a/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java
+++ b/src/test/java/net/floodlightcontroller/core/internal/ControllerTest.java
@@ -762,10 +762,11 @@ public class ControllerTest extends FloodlightTestCase
         assertTrue("Check that update is HARoleUpdate", 
                    upd instanceof Controller.HARoleUpdate);
         Controller.HARoleUpdate roleUpd = (Controller.HARoleUpdate)upd;
-        assertSame(null, roleUpd.oldRole);
+        assertSame(Role.MASTER, roleUpd.oldRole);
         assertSame(Role.SLAVE, roleUpd.newRole);
     }
     
+    @SuppressWarnings("unchecked")
     @Test
     public void testCheckSwitchReady() {
         OFChannelState state = new OFChannelState();
@@ -825,13 +826,14 @@ public class ControllerTest extends FloodlightTestCase
         // setupSwitchForAddSwitch(chdlr.sw, 0L);
         // chdlr.sw.clearAllFlowMods();
         desc.setManufacturerDescription("test vendor");
+        controller.roleChanger.submitRequest(
+                (List<IOFSwitch>)EasyMock.anyObject(),
+                (Role)EasyMock.anyObject());
         replay(controller.roleChanger);
         chdlr.checkSwitchReady();
         verify(controller.roleChanger);
         assertSame(OFChannelState.HandshakeState.READY, state.hsState);
-        assertSame(chdlr.sw, controller.activeSwitches.get(0L));
-        assertTrue(controller.connectedSwitches.contains(chdlr.sw));
-        assertTrue(state.firstRoleReplyReceived);
+        assertFalse(state.firstRoleReplyReceived);
         reset(controller.roleChanger);
         controller.connectedSwitches.clear();
         controller.activeSwitches.clear();
@@ -850,7 +852,7 @@ public class ControllerTest extends FloodlightTestCase
         assertSame(OFChannelState.HandshakeState.READY, state.hsState);
         assertTrue(controller.activeSwitches.isEmpty());
         assertFalse(controller.connectedSwitches.isEmpty());
-        assertTrue(state.firstRoleReplyReceived);
+        assertFalse(state.firstRoleReplyReceived);
         Collection<IOFSwitch> swList = swListCapture.getValue();
         assertEquals(1, swList.size());
     }
diff --git a/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java b/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java
index 9c49aad3d..06795e4dd 100644
--- a/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java
+++ b/src/test/java/net/floodlightcontroller/core/internal/RoleChangerTest.java
@@ -17,7 +17,6 @@ import java.util.LinkedList;
 import java.util.List;
 
 import net.floodlightcontroller.core.FloodlightContext;
-import net.floodlightcontroller.core.IFloodlightProviderService;
 import net.floodlightcontroller.core.IOFSwitch;
 import net.floodlightcontroller.core.IFloodlightProviderService.Role;
 import net.floodlightcontroller.core.internal.RoleChanger.PendingRoleRequestEntry;
@@ -39,11 +38,11 @@ import org.openflow.vendor.nicira.OFRoleVendorData;
 
 public class RoleChangerTest {
     public RoleChanger roleChanger;
-    IFloodlightProviderService controller;
+    Controller controller;
     
     @Before
     public void setUp() throws Exception {
-        controller = createMock(IFloodlightProviderService.class);
+        controller = createMock(Controller.class);
         roleChanger = new RoleChanger(controller);
         BasicFactory factory = new BasicFactory();
         expect(controller.getOFMessageFactory()).andReturn(factory).anyTimes();
@@ -179,8 +178,10 @@ public class RoleChangerTest {
         PendingRoleRequestEntry entry =
                 new PendingRoleRequestEntry(1, Role.MASTER, 123456);
         pendingList2.add(entry);
-        // Timed out switch should disconnect
-        sw2.setHARole(null, false);
+        // Timed out switch should become active
+        expect(sw2.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA))
+                .andReturn(null);
+        sw2.setHARole(Role.MASTER, false);
         EasyMock.expectLastCall();
         switches.add(sw2);
         
@@ -219,7 +220,7 @@ public class RoleChangerTest {
     @Test
     public void testSubmitRequest() throws Exception {
         LinkedList<IOFSwitch> switches = new LinkedList<IOFSwitch>();
-        roleChanger.timeout = 500*1000*1000; // 500 ms
+        roleChanger.timeout = 100*1000*1000; // 100 ms
         
         // a switch that supports role requests
         OFSwitchImpl sw1 = EasyMock.createStrictMock(OFSwitchImpl.class);
@@ -235,26 +236,41 @@ public class RoleChangerTest {
         expect(sw1.getNextTransactionId()).andReturn(2);
         sw1.write((List<OFMessage>)EasyMock.anyObject(),
                 (FloodlightContext)EasyMock.anyObject());
+        expect(sw1.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA))
+                .andReturn(null);
+        sw1.setHARole(Role.MASTER, false);
+        expect(sw1.getAttribute(IOFSwitch.SWITCH_DESCRIPTION_DATA))
+                .andReturn(null);
+        sw1.setHARole(Role.SLAVE, false);
+        // Disconnect on timing out SLAVE request
+        sw1.disconnectOutputStream();
         switches.add(sw1);
         
+        // Add to switch when timing out the MASTER request
+        controller.addSwitch(sw1, true);
+        
+
         replay(sw1, controller);
         roleChanger.submitRequest(switches, Role.MASTER);
         roleChanger.submitRequest(switches, Role.SLAVE);
-        // Wait until role request has been sent. 
-        // TODO: need to get rid of this sleep somehow
-        Thread.sleep(100);
-        // Now there should be exactly one timeout task pending
+        // Wait until role request has been sent.
+        synchronized (roleChanger.pendingTasks) { 
+            while (RoleChanger.RoleChangeTask.Type.TIMEOUT !=
+                     roleChanger.pendingTasks.peek().type) {
+                roleChanger.pendingTasks.wait();
+            }
+        }
+        // Now there should be exactly one timeout task pending for each request
         assertEquals(2, roleChanger.pendingTasks.size());
-        // Make sure it's indeed a timeout task
-        assertSame(RoleChanger.RoleChangeTask.Type.TIMEOUT, 
-                     roleChanger.pendingTasks.peek().type);
         // Check that RoleChanger indeed made a copy of switches collection
         assertNotSame(switches, roleChanger.pendingTasks.peek().switches);
         
         // Wait until the timeout triggers 
-        // TODO: get rid of this sleep too.
-        Thread.sleep(500);
-        assertEquals(0, roleChanger.pendingTasks.size());
+        synchronized (roleChanger.pendingTasks) { 
+            while (roleChanger.pendingTasks.size() != 0) {
+                roleChanger.pendingTasks.wait();
+            }
+        }
         verify(sw1, controller);
         
     }
@@ -375,12 +391,12 @@ public class RoleChangerTest {
         Role role = Role.MASTER;
         OFSwitchImpl sw = new OFSwitchImpl();
         setupPendingRoleRequest(sw, xid, role, cookie);
-        assertEquals(true,
+        assertNotSame(null,
                 roleChanger.checkFirstPendingRoleRequestCookie(sw, cookie));
-        assertEquals(false,
+        assertEquals(null,
                 roleChanger.checkFirstPendingRoleRequestCookie(sw, 0));
         roleChanger.pendingRequestMap.get(sw).clear();
-        assertEquals(false,
+        assertEquals(null,
                 roleChanger.checkFirstPendingRoleRequestCookie(sw, cookie));
     }
     
-- 
GitLab