ANOVA

Analysis of Variance models

Examples

In [1]: import statsmodels.api as sm

In [2]: from statsmodels.formula.api import ols

In [3]: moore = sm.datasets.get_rdataset("Moore", "car",
   ...:                                  cache=True) # load data
   ...: 
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
   1391 
-> 1392             super().connect()
   1393 

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    703     err = None
--> 704     for res in getaddrinfo(host, port, 0, SOCK_STREAM):
    705         af, socktype, proto, canonname, sa = res

/usr/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    744     addrlist = []
--> 745     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    746         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-3-5b46e3f60387> in <module>()
      1 moore = sm.datasets.get_rdataset("Moore", "car",
----> 2                                  cache=True) # load data

/build/statsmodels-pH_Txj/statsmodels-0.8.0/.pybuild/pythonX.Y_3.6/build/statsmodels/datasets/utils.py in get_rdataset(dataname, package, cache)
    288                      "master/doc/"+package+"/rst/")
    289     cache = _get_cache(cache)
--> 290     data, from_cache = _get_data(data_base_url, dataname, cache)
    291     data = read_csv(data, index_col=0)
    292     data = _maybe_reset_index(data)

/build/statsmodels-pH_Txj/statsmodels-0.8.0/.pybuild/pythonX.Y_3.6/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
    219     url = base_url + (dataname + ".%s") % extension
    220     try:
--> 221         data, from_cache = _urlopen_cached(url, cache)
    222     except HTTPError as err:
    223         if '404' in str(err):

/build/statsmodels-pH_Txj/statsmodels-0.8.0/.pybuild/pythonX.Y_3.6/build/statsmodels/datasets/utils.py in _urlopen_cached(url, cache)
    210     # not using the cache or didn't find it in cache
    211     if not from_cache:
--> 212         data = urlopen(url).read()
    213         if cache is not None:  # then put it in the cache
    214             _cache_it(data, cache_path)

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in https_open(self, req)
   1359         def https_open(self, req):
   1360             return self.do_open(http.client.HTTPSConnection, req,
-> 1361                 context=self._context, check_hostname=self._check_hostname)
   1362 
   1363         https_request = AbstractHTTPHandler.do_request_

/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
-> 1320                 raise URLError(err)
   1321             r = h.getresponse()
   1322         except:

URLError: <urlopen error [Errno -2] Name or service not known>

In [4]: data = moore.data
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-7f7c7245843e> in <module>()
----> 1 data = moore.data

NameError: name 'moore' is not defined

In [5]: data = data.rename(columns={"partner.status":
   ...:                             "partner_status"}) # make name pythonic
   ...: 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-8a6860e82653> in <module>()
----> 1 data = data.rename(columns={"partner.status":
      2                             "partner_status"}) # make name pythonic
      3 

NameError: name 'data' is not defined

In [6]: moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
   ...:                 data=data).fit()
   ...: 
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-1a5fe75a6b5a> in <module>()
      1 moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
----> 2                 data=data).fit()

NameError: name 'data' is not defined

In [7]: table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-e1cc32cde8d3> in <module>()
----> 1 table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame

NameError: name 'moore_lm' is not defined

In [8]: print(table)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-2e062ec15c20> in <module>()
----> 1 print(table)

NameError: name 'table' is not defined

A more detailed example can be found here:

Module Reference

anova_lm(*args, **kwargs) ANOVA table for one or more fitted linear models.