ó
LáOc           @   s¤   d  Z  d d l Z d d l Z d d l Z d d l j Z d d d d d  Z	 d   Z
 d d d  Z d e f d     YZ d	   Z d
   Z d   Z d   Z d S(   so  
prpy module lindisc.py
Jason Corso (jcorso@acm.org)

This module has been programmed to support teaching an introduction to
 pattern recognition course.

Contains implementations of linear discriminants.

This file is organized alphabetically by function name.

Some pointers on convention:
    1.  Generally, data is stored in an N x D matrix, where N is the number of samples and D is the dimension.
    2.  Class information is typically stored in a parallel N x 1 vector.
    3.  All plotting functions assume the data is 2 dimensions.

Please report bugs/fixes/enhancements to jcorso@acm.org when you find/make them.
i˙˙˙˙Ni    c   	      C   sĂ  | d k r t j   } n t j | j  t j   t j d d d d g  t j t  t j   | d k	 r
| | j	 d  | d k } xy t
 | j d  D]a } | | s˘ t j |  | d f |  | d f d d t d d d	 d d d g d
 d d d q˘ q˘ Wn  x˘ t
 | j d  D] } | | d k rqt j |  | d f |  | d f t j d d d d t qt j |  | d f |  | d f t j d d d d t qWt | | |  | S(   s2   Debug two-class plotting and draw the linear discriminant. 
    
        (X,Y) are the data set
        Z is the normalized data set

        a is the weight vector, b (if exists) is the bias (a'x + b is the classifier)
        m is the margin (if exists)

        h is the figure handle to draw into
    iö˙˙˙i
   i   i    t   got   holdt
   markersizeg      ,@t   markerfacecolort   markeredgecolort   gt   markeredgewidthg      ř?i˙˙˙˙g       @N(   t   Nonet   pltt   figuret   numbert   clft   axist   gridt   Truet   showt   sumt   ranget   shapet   plott	   datatoolst   kDrawt   plotWVector(	   t   Xt   Yt   at   bt   ht   Zt   mt   Bt   i(    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt	   debugPlot   s&    


2&=>c         C   s&   |  j    } | | d k c d 9<| S(   sN    Data normalization procedure: multiple data values by -1 if the class is -1. i˙˙˙˙(   t   copy(   R   R   t   Xhat(    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt	   normalizeG   s    c         C   s  t  j d  } | d k	 r1 | |  d | d <n  |  t  j j |   } | d | } | d | } t j | d | d g | d | d g d d d t j | d | d g | d | d g d	 d d t  j | d | d g  } | d | } | d | }	 t j | d |	 d g | d |	 d g d
 d d | d k	 rt  j |  r| | | }
 |
 d | } |
 d | } t j | d | d g | d | d g d d d | | | }
 |
 d | } |
 d | } t j | d | d g | d | d g d d d n  d S(   s    In the current axes, plot the weight vector. 
     
        a is the weight vector, b is the bias term
        m is the margin
    i   i   i2   iÎ˙˙˙i    R   t	   linewidthg      @t   rt   ks   k-g      đ?N(	   t   npt   zerosR   t   linalgt   normR   R   t   asarrayt   isscalar(   R   R   R   t   Ot   ant   Vt   Ut   anrt   AR   t   Obt   Abt   Bb(    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyR   N   s*    5555t   LinDiscc           B   s&   e  Z d  Z d d d  Z d   Z RS(   sV   
    A Linear Discriminant Classifier

    Bias included but can be disregarded.

    c         C   s.   | |  _  | d  k r! d |  _ n	 | |  _ d  S(   Ni    (   t   wR   R   (   t   selfR7   R   (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   __init__~   s    	c         C   s    t  j |  j j |  |  j  S(   s.    Classify the sample using the weight vector. (   R'   t   signR7   t   dotR   (   R8   t   sample(    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   classify   s    N(   t   __name__t
   __module__t   __doc__R   R9   R=   (    (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyR6   v   s   c   
      C   s.  t  |  |  } t |  } d } | j   } | t j j |  } t |  | | d | } xĚ | d k  r)d | | d | d f G| | j d  d k }	 d t |	  | f GHt |  | | d | d | t   t |	  d k rë Pn  | | | |	 j d  } | t j j |  } | d 7} q^ W| S(   sŐ   
    Run the batch perceptron algorithm on (X,Y) to learn a linear discriminant. 
    
      Batch Perceptron on D (not normalized)
        for dimension d and n samples
      X is n by d 
      Y is n by 1 and is -1 or +1 for classes
      Assume (linear) separability.

        sum_{y in incorrect} -a'y

      for all x in D (column vector)

       a'x > 0

      no margin

      Basic assumption we have 2D points in a plane, for debugging and visualization.
    i    R   i
   s   iteration %d   %.4f  %.4f	i   s      %d of %d samples correctR   (	   R#   t   lenR!   R'   R)   R*   R    R   t	   raw_input(
   R   R   t   a0t   etaR   t   nt   tR   R   R   (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   batchPerceptron   s$    c         C   s   t  |  |  } t |  } t j |  r@ t j |  | } n  t j j |  j |  } | t j j |  } t	 |  | | d | d | } | S(   sí    
    Run a least-squares estimation of the discriminant via the pseudo-inverse.

      X is n by d 
      Y is n by 1 and is -1 or +1 for classes

      Linear separability is not needed and a "best" answer will still be returned.

    R   R   (
   R#   RA   R'   R,   t   onesR)   t   pinvR;   R*   R    (   R   R   R   R   RE   R   R   (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   mseÂ   s    c         C   sÁ  t  |  |  } t |  } d } d } | j   } | t j j |  } t |  | | d | }	 d }
 x7|
 | k  r |
 d 7}
 | j | | d d  f  } d | | |
 | f GH| d k rd }
 | d 7} d | | d | d f GHt |  | | d |	 d | t j	 |  | d f |  | d f d d	 t
 d
 d | | | | d d  f j   } | t j j |  } t   } | d k rPqn  | d | } qj Wt |  | | d |	 d | | S(   sÝ   
    Run the single-sample perceptron algorithm on (X,Y) to learn a linear discriminant. 
    
      Batch Perceptron on D (not normalized)
        for dimension d and n samples
      X is n by d 
      Y is n by 1 and is -1 or +1 for classes
      Assume (linear) separability.

        sum_{y in incorrect} -a'y

      for all x in D (column vector)

       a'x > 0

      no margin

      Basic assumption we have 2D points in a plane, for debugging and visualization.
    i    R   i   Ns/   sample %d, val %0.3f, current count is %d of %ds'   correction iteration %d      %.4f  %.4fR   t   yoR   R   g      ,@t   q(   R#   RA   R!   R'   R)   R*   R    R;   R   R   R   t   squeezeRB   (   R   R   RC   RD   R   RE   R   RF   R   R   t   countt   valt   s(    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   ssPerceptronŰ   s4    

3$	c      
   C   s  t  |  |  } t |  } d } d } | j   }	 |	 t j j |	  }	 t |  | |	 d | d | }
 d } x}| | k  rě| d 7} |	 j | | d d  f  } d | | | | f GH| | k rŰd } | d 7} d | |	 d |	 d f GHt |  | |	 d |
 d | d | t j	 |  | d f |  | d f d	 d
 t
 d d | | t j t j j | | d d  f j    d  } |	 | | | | d d  f j   }	 |	 t j j |	  }	 t   } | d k rŰPqŰn  | d | } qp Wt |  | |	 d |
 d | d | |	 S(   s  
    Run the single-sample relaxation with margin procedure.

      Single-Sample Relaxation with the margin on D (not normalized)
       for dimension d and n samples
      D is n by d + 1 where the last column of D is
      Y is n by 1 and is -1 or +1 for classes
      Assume separability with the margin

        1/2 sum_{y in incorrect} (a'y - b)^2 / ||y||

      for all x in D (column vector)

    i    R   R   i   Ns/   sample %d, val %0.3f, current count is %d of %ds'   correction iteration %d      %.4f  %.4fR   RK   R   R   g      ,@i   RL   (   R#   RA   R!   R'   R)   R*   R    R;   R   R   R   t   powerRM   RB   (   R   R   RC   RD   R   R   RE   R   RF   R   R   RN   RO   t   distRP   (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   ssRelaxation  s6    

"3<(	"(   R@   R   t   syst   numpyR'   t   matplotlib.pyplott   pyplotR   R   R    R#   R   t   objectR6   RG   RJ   RQ   RT   (    (    (    s4   /home/csefaculty/jcorso/555code/code/prpy/lindisc.pyt   <module>   s   (	(	5		@