Iterators

We will continue our discussion of building a recursive linked list data structure, with a particular focus on being able to iterate over our list.

Linked List Class

So far we have the following methods implemented in our linked-list class – we can create a list, get items from specific indices (__getitem__), set items at specific indices (__setitem__), check for equality between lists (__eq__), and also get a nice string representation of the list for printing (__str__).

There is one big piece that’s missing from our list implementation – adding items to the list! Whether at the end of the list (appending), the beginning of the list (prepending), or somewhere inbetween (insertion). Let’s now see how to implement these functions.

class LinkedList:
    """Implements our own recursive list data structure"""
    __slots__ = ['_value', '_rest']

    def __init__(self, value=None, rest=None):
        self._value = value
        self._rest = rest
       
    # getters/setters
    def getRest(self):
        return self._rest
    
    def getValue(self):
        return self._value

    def setValue(self, val):
        self._value = val
        
    def __strElements(self):
        # helper function for __str__()
        if self._rest is None:
            return str(self._value)
        else:
            return str(self._value) + ", " + self._rest.__strElements()
        
    def __str__(self):
        return "[" + self.__strElements() + "]"
    
    # repr() function calls __repr__() method
    # return value should be a string that is a valid Python 
    # expression that can be used to recreate the LinkedList
    def __repr__(self):
        return "LinkedList({}, {})".format(self._value, repr(self._rest))

    # len() function calls __len__() method
    def __len__(self):
        # base case: i'm the last item
        if self._rest is None:
            return 1
        else:
            # same as return 1 + self.rest.__len__()
            return 1 + len(self._rest)  
    
    # in operator calls __contains__() method
    def __contains__(self, val):
        if self._value == val:
            return True
        elif self._rest is None:
            return False
        else:
            # same as calling self._rest.__contains__(val)
            # return val in self._rest
            return self._rest.__contains__(val)

    # [] list index notation calls __getitem__() method
    # index specifies which item we want
    def __getitem__(self, index):
        # if index is 0, we found the item we need to return
        if index == 0:
            return self._value
        else:
            # else we recurse until index reaches 0
            # remember that this implicitly calls __getitem__
            return self._rest[index - 1]
        
    # [] list index notation also calls __setitem__() method
    # index specifies which item we want, val is new value
    def __setitem__(self, index, val):
        # if index is 0, we found the item we need to update
        if index == 0:
            self._value = val
        else:
            # else we recurse until index reaches 0
            # remember that this implicitly calls __setitem__
            self._rest[index - 1] = val
            
    # == operator calls __eq__() method
    # if we want to test two LinkedLists for equality, we test 
    # if all items are the same
    # other is another LinkedList
    def __eq__(self, other):
        # If both lists are empty
        if self._rest is None and other.getRest() is None:
            return self._value == other.getValue()

        # If both lists are not empty, then value of current list elements 
        # must match, and same should be recursively true for 
        # rest of the list
        elif self._rest is not None and other.getRest() is not None :
            return self._value == other.getValue() and self._rest == other.getRest()

        # If we reach here, then one of the lists is empty and 
        # other is not, so return false
        else:
            return False
   
    # append is not a special method, but it is a method
    # that we know and love from the Python list class.
    def append(self, val):
        # if am at the list item
        if self._rest is None:
            # add a new LinkedList to the end
            self._rest = LinkedList(val)
        else:
            # else recurse until we find the end
            self._rest.append(val)
            
    # prepend allows us to add an element to the beginning of our list.
    # like append, it will mutate the LinkedList instance it is called on
    # LinkedLists are really fast at doing prepend operations -- you can
    # see that there's no for loop required, just a few variable re-assignments!
    def prepend(self, val):
        oldVal = self._value
        oldRest = self._rest
        self._value = val
        self._rest = LinkedList(oldVal, oldRest)    
    
    # inserts need a bit of iteration, but only until the index where
    # we'd like to insert the new element. once we reach that spot -- the
    # insertion operation itself is easy
    def insert(self, val, index):        
        if index == 0:
            self.prepend(val)
        else:
            currList = self
            while index > 1:
                index -= 1
                currList = currList._rest
            currList._rest = LinkedList(val, currList._rest)
    
    # here is a recursive version of insert
    def insertRec(self, val, index):    
       # if index is 0, we found the item we need to return                                                                 
        if index == 0:
            self.prepend(val)
        # elif we have reached the end of the list, so just append to the end
        elif self._rest is None:
            self._rest = LinkedList(val)
        # else we recurse until index reaches 0
        else:
            self._rest.insertRec(val, index - 1)
myList = LinkedList("a")
myList2 = LinkedList("a")
myList == myList2
True
newList = LinkedList(5)
newList.append(10)
newList.append(11)
print(newList)
[5, 10, 11]
newList.prepend(1)
newList.prepend(42)
print(newList)
[42, 1, 5, 10, 11]
newList = LinkedList(5)
newList.insert(13, 0) # insert at the beginning
print(newList)
newList.insert(100, len(newList)) # insert at the end
print(newList)
newList.insert(20, 2) # insert somewhere inbetween
print(newList)
[13, 5]
[13, 5, 100]
[13, 5, 20, 100]

Iterating over our list

One last piece – how do we actually iterate over our list using a for loop? We could use a loop that looks as follows.

newList = LinkedList(5)
newList.append(10)
newList.append(42)

for i in range(len(newList)):
    print(newList[i])
5
10
42

This sort of works, but what we’d really like to be able to do is iterate directly over the elements of the list as we did with “regular” lists all semester. However, when we use the usual for each item in list notation, we end up with an error.

Side Note: Besides the clunkiness of using range in the previous loop, given our LinkedList implementation, it is also very inefficient – first, a call to len() iterates over the entire list, and each indexing call newList[i] also iterates over the list up to index i each time.

newList = LinkedList(5)
newList.append(10)
newList.append(42)

for item in newList:
    print(item)
5
10
42
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/1592532304.py in <module>
      3 newList.append(42)
      4 
----> 5 for item in newList:
      6     print(item)

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/3542801508.py in __getitem__(self, index)
     62             # else we recurse until index reaches 0
     63             # remember that this implicitly calls __getitem__
---> 64             return self._rest[index - 1]
     65 
     66     # [] list index notation also calls __setitem__() method

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/3542801508.py in __getitem__(self, index)
     62             # else we recurse until index reaches 0
     63             # remember that this implicitly calls __getitem__
---> 64             return self._rest[index - 1]
     65 
     66     # [] list index notation also calls __setitem__() method

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/3542801508.py in __getitem__(self, index)
     62             # else we recurse until index reaches 0
     63             # remember that this implicitly calls __getitem__
---> 64             return self._rest[index - 1]
     65 
     66     # [] list index notation also calls __setitem__() method

TypeError: 'NoneType' object is not subscriptable

Python Iterables

It turns out, if we didn’t have the __getitem__ method in our LinkedList class, we would have gotten an even more serious error stating that instances of our class are not “iterable” at all.

class LinkedList:
    """Implements our own recursive list data structure"""
    __slots__ = ['_value', '_rest']

    def __init__(self, value=None, rest=None):
        self._value = value
        self._rest = rest
       
    # getters/setters
    def getRest(self):
        return self._rest
    
    def getValue(self):
        return self._value

    def setValue(self, val):
        self._value = val
        
    def __strElements(self):
        # helper function for __str__()
        if self._rest is None:
            return str(self._value)
        else:
            return str(self._value) + ", " + self._rest.__strElements()
        
    def __str__(self):
        return "[" + self.__strElements() + "]"
    
    # repr() function calls __repr__() method
    # return value should be a string that is a valid Python 
    # expression that can be used to recreate the LinkedList
    def __repr__(self):
        return "LinkedList({}, {})".format(self._value, repr(self._rest))

    # len() function calls __len__() method
    def __len__(self):
        # base case: i'm the last item
        if self._rest is None:
            return 1
        else:
            # same as return 1 + self.rest.__len__()
            return 1 + len(self._rest)  
    
    # in operator calls __contains__() method
    def __contains__(self, val):
        if self._value == val:
            return True
        elif self._rest is None:
            return False
        else:
            # same as calling self._rest.__contains__(val)
            # return val in self._rest
            return self._rest.__contains__(val)

#    # [] list index notation calls __getitem__() method
#    # index specifies which item we want
#    def __getitem__(self, index):
#        # if index is 0, we found the item we need to return
#        if index == 0:
#            return self._value
#        else:
#            # else we recurse until index reaches 0
#            # remember that this implicitly calls __getitem__
#            return self._rest[index - 1]
        
    # [] list index notation also calls __setitem__() method
    # index specifies which item we want, val is new value
    def __setitem__(self, index, val):
        # if index is 0, we found the item we need to update
        if index == 0:
            self._value = val
        else:
            # else we recurse until index reaches 0
            # remember that this implicitly calls __setitem__
            self._rest[index - 1] = val
            
    # == operator calls __eq__() method
    # if we want to test two LinkedLists for equality, we test 
    # if all items are the same
    # other is another LinkedList
    def __eq__(self, other):
        # If both lists are empty
        if self._rest is None and other.getRest() is None:
            return True

        # If both lists are not empty, then value of current list elements 
        # must match, and same should be recursively true for 
        # rest of the list
        elif self._rest is not None and other.getRest() is not None :
            return self._value == other.getValue() and self._rest == other.getRest()

        # If we reach here, then one of the lists is empty and other is not
        return False
   
    # append is not a special method, but it is a method
    # that we know and love from the Python list class.
    def append(self, val):
        # if am at the list item
        if self._rest is None:
            # add a new LinkedList to the end
            self._rest = LinkedList(val)
        else:
            # else recurse until we find the end
            self._rest.append(val)
            
    # prepend allows us to add an element to the beginning of our list.
    # like append, it will mutate the LinkedList instance it is called on
    # LinkedLists are really fast at doing prepend operations -- you can
    # see that there's no for loop required, just a few variable re-assignments!
    def prepend(self, val):
        oldVal = self._value
        oldRest = self._rest
        self._value = val
        self._rest = LinkedList(oldVal, oldRest)    
    
    # inserts need a bit of iteration, but only until the index where
    # we'd like to insert the new element. once we reach that spot -- the
    # insertion operation itself is easy
    def insert(self, val, index):        
        if index == 0:
            self.prepend(val)
        else:
            currList = self
            while index > 1:
                index -= 1
                currList = currList._rest
            currList._rest = LinkedList(val, currList._rest)
    
    # here is a recursive version of insert
    def insertRec(self, val, index):    
       # if index is 0, we found the item we need to return                                                                 
        if index == 0:
            return self.prepend(val)
        else:
            # else we recurse until index reaches 0
            return self._rest.insertRec(val, index - 1)
newList = LinkedList(5)
newList.append(10)
newList.append(42)

for item in newList:
    print(item)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/1592532304.py in <module>
      3 newList.append(42)
      4 
----> 5 for item in newList:
      6     print(item)

TypeError: 'LinkedList' object is not iterable

Examples of Python iterables

Built-in sequences such as lists, tuples, strings are iterables, meaning we can iterate over them. We can call the iter() function on them to create an iterator. Iterators generates values from the sequence on demand by invoking the next() function. When there are no values left, a StopIteration exception is raised.

charList = list("rain")
charList
['r', 'a', 'i', 'n']
charIterator = iter(charList)
type(charIterator)
list_iterator
next(charIterator)
'r'
next(charIterator)
'a'
next(charIterator)
'i'
next(charIterator)
'n'
next(charIterator)
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/2255711290.py in <module>
----> 1 next(charIterator)

StopIteration: 

Implementing __iter__ and __next__

So, to allow for iteration in our own linked list class, all we need to do is implement the __iter__ and __next__ methods.

class LinkedList:
    """Implements our own recursive list data structure"""
    __slots__ = ['_value', '_rest', '_current']

    def __init__(self, value=None, rest=None):
        self._value = value
        self._rest = rest
        self._current = self
       
    # getters/setters
    def getRest(self):
        return self._rest
    
    def getValue(self):
        return self._value

    def setValue(self, val):
        self._value = val
        
    def __strElements(self):
        # helper function for __str__()
        if self._rest is None:
            return str(self._value)
        else:
            return str(self._value) + ", " + self._rest.__strElements()
        
    def __str__(self):
        return "[" + self.__strElements() + "]"
    
    # repr() function calls __repr__() method
    # return value should be a string that is a valid Python 
    # expression that can be used to recreate the LinkedList
    def __repr__(self):
        return "LinkedList({}, {})".format(self._value, repr(self._rest))

    # len() function calls __len__() method
    def __len__(self):
        # base case: i'm the last item
        if self._rest is None:
            return 1
        else:
            # same as return 1 + self.rest.__len__()
            return 1 + len(self._rest)  
    
    # in operator calls __contains__() method
    def __contains__(self, val):
        if self._value == val:
            return True
        elif self._rest is None:
            return False
        else:
            # same as calling self._rest.__contains__(val)
            # return val in self._rest
            return self._rest.__contains__(val)

    # [] list index notation calls __getitem__() method
    # index specifies which item we want
    def __getitem__(self, index):
        # if index is 0, we found the item we need to return
        if index == 0:
            return self._value
        else:
            # else we recurse until index reaches 0
            # remember that this implicitly calls __getitem__
            return self._rest[index - 1]
        
    # [] list index notation also calls __setitem__() method
    # index specifies which item we want, val is new value
    def __setitem__(self, index, val):
        # if index is 0, we found the item we need to update
        if index == 0:
            self._value = val
        else:
            # else we recurse until index reaches 0
            # remember that this implicitly calls __setitem__
            self._rest[index - 1] = val
            
    # == operator calls __eq__() method
    # if we want to test two LinkedLists for equality, we test 
    # if all items are the same
    # other is another LinkedList
    def __eq__(self, other):
        # If both lists are empty
        if self._rest is None and other.getRest() is None:
            return True

        # If both lists are not empty, then value of current list elements 
        # must match, and same should be recursively true for 
        # rest of the list
        elif self._rest is not None and other.getRest() is not None :
            return self._value == other.getValue() and self._rest == other.getRest()

        # If we reach here, then one of the lists is empty and other is not
        return False
   
    # append is not a special method, but it is a method
    # that we know and love from the Python list class.
    def append(self, val):
        # if am at the list item
        if self._rest is None:
            # add a new LinkedList to the end
            self._rest = LinkedList(val)
        else:
            # else recurse until we find the end
            self._rest.append(val)
            
    # prepend allows us to add an element to the beginning of our list.
    # like append, it will mutate the LinkedList instance it is called on
    # LinkedLists are really fast at doing prepend operations -- you can
    # see that there's no for loop required, just a few variable re-assignments!
    def prepend(self, val):
        oldVal = self._value
        oldRest = self._rest
        self._value = val
        self._rest = LinkedList(oldVal, oldRest)    
    
    # inserts need a bit of iteration, but only until the index where
    # we'd like to insert the new element. once we reach that spot -- the
    # insertion operation itself is easy
    def insert(self, val, index):       
        if index == 0:
            self.prepend(val)
        else:
            currList = self
            while index > 1:
                index -= 1
                currList = currList._rest
            currList._rest = LinkedList(val, currList._rest)
            
   # here is a recursive version of insert
    def insertRec(self, val, index):    
       # if index is 0, we found the item we need to return                                                                 
        if index == 0:
            return self.prepend(val)
        else:
            # else we recurse until index reaches 0
            return self._rest.insertRec(val, index - 1)

    def __iter__(self):
        # set current attribute to head (front of list)
        self._current = self
        return self
 
    def __next__(self):
        if self._current is None:
            # we have reached the end of the list
            raise StopIteration
        else:
            # advance current to the next element in the list
            val = self._current._value
            self._current = self._current._rest
            return val
testList = LinkedList("w")
testList.append("o")
testList.append("o")
testList.append("t")
print("testList: ",testList)

# for loops automatically use iterators
for char in testList:
    print(char)
testList:  [w, o, o, t]
w
o
o
t
listIterator = iter(testList)
print(next(listIterator))
print(next(listIterator))
print(next(listIterator))
print(next(listIterator))
w
o
o
t
# this will raise a StopIteration exception
print(next(listIterator)) 
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/1584592250.py in <module>
      1 # this will raise a StopIteration exception
----> 2 print(next(listIterator))

/var/folders/md/kwd9nc_d2ns0hw9wsvdrnt2c0000gn/T/ipykernel_85941/444043008.py in __next__(self)
    145         if self._current is None:
    146             # we have reached the end of the list
--> 147             raise StopIteration
    148         else:
    149             # advance current to the next element in the list

StopIteration: