{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Essential methods\n",
    "\n",
    "This introduction to pandas is based on the [official pandas documentation](https://pandas.pydata.org/docs/user_guide/10min.html). It contains information from the first steps in pandas tutorial but also covers more advanced topics.\n",
    "\n",
    "\n",
    "Customarily, we import as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Object creation\n",
    "\n",
    "Creating a [Series](https://pandas.pydata.org/docs/reference/api/pandas.Series.html#pandas.Series) by passing a list of values, letting pandas create a default integer index:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    3.0\n",
       "2    5.0\n",
       "3    NaN\n",
       "4    6.0\n",
       "5    8.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series([1, 3, 5, np.nan, 6, 8])\n",
    "\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Creating a [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html#pandas.DataFrame) by passing a NumPy array, with a datetime index and labeled columns:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',\n",
       "               '2021-01-05', '2021-01-06'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dates = pd.date_range(\"20210101\", periods=6)\n",
    "\n",
    "dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list(\"ABCD\"))\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a simple DataFrame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_height = pd.DataFrame(\n",
    "    { \n",
    "        'name': pd.Categorical([ \"Tom\", \"Marc\", \"Peter\"]),\n",
    "        'height': np.array([168, 193, 172],dtype='int32'),\n",
    "        'gender': 'male' })"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Creating a DataFrame by passing a dict of objects that can be converted to series-like."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>test</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>train</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>test</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2013-01-02</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>train</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A          B    C  D      E    F\n",
       "0  1.0 2013-01-02  1.0  3   test  foo\n",
       "1  1.0 2013-01-02  1.0  3  train  foo\n",
       "2  1.0 2013-01-02  1.0  3   test  foo\n",
       "3  1.0 2013-01-02  1.0  3  train  foo"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = pd.DataFrame(\n",
    "    {\n",
    "        \"A\": 1.0,\n",
    "        \"B\": pd.Timestamp(\"20130102\"),\n",
    "        \"C\": pd.Series(1, index=list(range(4)), dtype=\"float32\"),\n",
    "        \"D\": np.array([3] * 4, dtype=\"int32\"),\n",
    "        \"E\": pd.Categorical([\"test\", \"train\", \"test\", \"train\"]),\n",
    "        \"F\": \"foo\",\n",
    "    }\n",
    ")\n",
    "\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The columns of the resulting DataFrame have different [dtypes](https://pandas.pydata.org/docs/user_guide/basics.html#basics-dtypes)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A           float64\n",
       "B    datetime64[ns]\n",
       "C           float32\n",
       "D             int32\n",
       "E          category\n",
       "F            object\n",
       "dtype: object"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you’re using IPython, tab completion for column names (as well as public attributes) is automatically enabled. Here’s a subset of the attributes that will be completed:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df2.<TAB>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Viewing data\n",
    "\n",
    "Here is how to view the top and bottom rows of the frame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "DatetimeIndex: 6 entries, 2021-01-01 to 2021-01-06\n",
      "Freq: D\n",
      "Data columns (total 4 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   A       6 non-null      float64\n",
      " 1   B       6 non-null      float64\n",
      " 2   C       6 non-null      float64\n",
      " 3   D       6 non-null      float64\n",
      "dtypes: float64(4)\n",
      "memory usage: 240.0 bytes\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',\n",
       "               '2021-01-05', '2021-01-06'],\n",
       "              dtype='datetime64[ns]', freq='D')"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['A', 'B', 'C', 'D'], dtype='object')"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[describe()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html#pandas.DataFrame.describe) shows a quick statistic summary of your data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-0.778502</td>\n",
       "      <td>0.311938</td>\n",
       "      <td>0.121565</td>\n",
       "      <td>-0.462559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.362957</td>\n",
       "      <td>1.157215</td>\n",
       "      <td>0.656764</td>\n",
       "      <td>0.943086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-1.712933</td>\n",
       "      <td>-0.683063</td>\n",
       "      <td>-0.302247</td>\n",
       "      <td>-0.606256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-0.934632</td>\n",
       "      <td>0.276194</td>\n",
       "      <td>-0.154047</td>\n",
       "      <td>-0.123589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>-0.396269</td>\n",
       "      <td>1.094894</td>\n",
       "      <td>0.517794</td>\n",
       "      <td>0.072587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              A         B         C         D\n",
       "count  6.000000  6.000000  6.000000  6.000000\n",
       "mean  -0.778502  0.311938  0.121565 -0.462559\n",
       "std    1.362957  1.157215  0.656764  0.943086\n",
       "min   -2.204339 -0.886727 -0.492826 -2.238874\n",
       "25%   -1.712933 -0.683063 -0.302247 -0.606256\n",
       "50%   -0.934632  0.276194 -0.154047 -0.123589\n",
       "75%   -0.396269  1.094894  0.517794  0.072587\n",
       "max    1.591774  1.840986  1.140217  0.339868"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Transposing your data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>2021-01-01</th>\n",
       "      <th>2021-01-02</th>\n",
       "      <th>2021-01-03</th>\n",
       "      <th>2021-01-04</th>\n",
       "      <th>2021-01-05</th>\n",
       "      <th>2021-01-06</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.644635</td>\n",
       "      <td>-0.313480</td>\n",
       "      <td>1.591774</td>\n",
       "      <td>-1.875701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>1.032019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>-0.182438</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-0.492826</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>0.062465</td>\n",
       "      <td>-0.309644</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>0.339868</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>-0.705127</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   2021-01-01  2021-01-02  2021-01-03  2021-01-04  2021-01-05  2021-01-06\n",
       "A   -1.224629   -2.204339   -0.644635   -0.313480    1.591774   -1.875701\n",
       "B   -0.479631   -0.750873    1.115852   -0.886727    1.840986    1.032019\n",
       "C   -0.182438   -0.342183    0.732277    1.140217   -0.125656   -0.492826\n",
       "D    0.062465   -0.309644    0.075960    0.339868   -2.238874   -0.705127"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sorting by an axis:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>D</th>\n",
       "      <th>C</th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.062465</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-1.224629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-0.309644</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-2.204339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>0.075960</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>-0.644635</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>0.339868</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>-0.313480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>-2.238874</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>1.591774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-0.705127</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-1.875701</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   D         C         B         A\n",
       "2021-01-01  0.062465 -0.182438 -0.479631 -1.224629\n",
       "2021-01-02 -0.309644 -0.342183 -0.750873 -2.204339\n",
       "2021-01-03  0.075960  0.732277  1.115852 -0.644635\n",
       "2021-01-04  0.339868  1.140217 -0.886727 -0.313480\n",
       "2021-01-05 -2.238874 -0.125656  1.840986  1.591774\n",
       "2021-01-06 -0.705127 -0.492826  1.032019 -1.875701"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_index(axis=1, ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sorting by values:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(by=\"B\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selection\n",
    "\n",
    "While standard Python / NumPy expressions for selecting and setting are intuitive and come in handy for interactive work, for production code, we recommend the optimized pandas data access methods, .at, .iat, .loc and .iloc.\n",
    "\n",
    "### Getting\n",
    "Selecting a single column, which yields a Series, equivalent to `df.A`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2021-01-01   -1.224629\n",
       "2021-01-02   -2.204339\n",
       "2021-01-03   -0.644635\n",
       "2021-01-04   -0.313480\n",
       "2021-01-05    1.591774\n",
       "2021-01-06   -1.875701\n",
       "Freq: D, Name: A, dtype: float64"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"A\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Selecting via [], which slices the rows.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[0:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01 -1.224629 -0.479631 -0.182438  0.062465\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960"
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"20210101\":\"20210103\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Selection by label\n",
    "\n",
    "\n",
    "\n",
    "The `.loc` attribute is the primary access method. The following are valid inputs:\n",
    "\n",
    "- A single label, e.g. 5 or 'a' (Note that 5 is interpreted as a label of the index. This use is not an integer position along the index.).\n",
    "\n",
    "- A list or array of labels ['a', 'b', 'c'].\n",
    "\n",
    "- A slice object with labels 'a':'f' (Note that contrary to usual Python slices, both the start and the stop are included, when present in the index! See Slicing with labels.\n",
    "\n",
    "- A boolean array.\n",
    "\n",
    "- A callable, see [Selection By Callable](https://pandas.pydata.org/docs/user_guide/indexing.html#indexing-callable).\n",
    "\n",
    "\n",
    "For getting a cross section using a label:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -1.224629\n",
       "B   -0.479631\n",
       "C   -0.182438\n",
       "D    0.062465\n",
       "Name: 2021-01-01 00:00:00, dtype: float64"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[dates[0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Selecting on a multi-axis by label:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-1.224629</td>\n",
       "      <td>-0.479631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2021-01-01 -1.224629 -0.479631\n",
       "2021-01-02 -2.204339 -0.750873\n",
       "2021-01-03 -0.644635  1.115852\n",
       "2021-01-04 -0.313480 -0.886727\n",
       "2021-01-05  1.591774  1.840986\n",
       "2021-01-06 -1.875701  1.032019"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[ : , [\"A\", \"B\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Showing label slicing, both endpoints are included:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2021-01-02 -2.204339 -0.750873\n",
       "2021-01-03 -0.644635  1.115852\n",
       "2021-01-04 -0.313480 -0.886727"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"20210102\":\"20210104\", [\"A\", \"B\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Reduction in the dimensions of the returned object:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -2.204339\n",
       "B   -0.750873\n",
       "Name: 2021-01-02 00:00:00, dtype: float64"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[\"20210102\", [\"A\", \"B\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For getting a scalar value:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.2246288596124404"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[dates[0], \"A\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For getting fast access to a scalar (equivalent to the prior method) use `.at`.\n",
    "Use at if you only need to get or set a single value in a DataFrame or Series."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.2246288596124404"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.at[dates[0], \"A\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set value at specified row/column pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2021-01-01    1.000000\n",
       "2021-01-02   -2.204339\n",
       "2021-01-03   -0.644635\n",
       "2021-01-04   -0.313480\n",
       "2021-01-05    1.591774\n",
       "2021-01-06   -1.875701\n",
       "Freq: D, Name: A, dtype: float64"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.at[dates[0], \"A\"] = 1\n",
    "\n",
    "df.A"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Selection by position\n",
    "\n",
    "\n",
    "pandas provides a suite of methods in order to get purely integer based indexing. \n",
    "\n",
    "The .iloc attribute is the primary access method. The following are valid inputs:\n",
    "\n",
    "- An integer e.g. 5.\n",
    "\n",
    "- A list or array of integers [4, 3, 0].\n",
    "\n",
    "- A slice object with ints 1:7.\n",
    "\n",
    "- A boolean array.\n",
    "\n",
    "- A callable, see [Selection By Callable](https://pandas.pydata.org/docs/user_guide/indexing.html#indexing-callable).\n",
    "\n",
    "See more in [Selection by Position](https://pandas.pydata.org/docs/user_guide/indexing.html#indexing-integer)\n",
    "\n",
    "\n",
    "Select via the position of the passed integers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    1.000000\n",
       "B   -0.479631\n",
       "C   -0.182438\n",
       "D    0.062465\n",
       "Name: 2021-01-01 00:00:00, dtype: float64"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By integer slices, acting similar to NumPy/Python:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.479631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2021-01-01  1.000000 -0.479631\n",
       "2021-01-02 -2.204339 -0.750873\n",
       "2021-01-03 -0.644635  1.115852"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0:3, 0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B\n",
       "2021-01-02 -2.204339 -0.750873\n",
       "2021-01-03 -0.644635  1.115852"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[1:3, 0:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By lists of integer position locations, similar to the NumPy/Python style:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.342183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>0.732277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>-0.125656</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         C\n",
       "2021-01-02 -2.204339 -0.342183\n",
       "2021-01-03 -0.644635  0.732277\n",
       "2021-01-05  1.591774 -0.125656"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[[1, 2, 4], [0, 2]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For slicing rows explicitly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[1:3, :]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For slicing columns explicitly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   B         C\n",
       "2021-01-01 -0.479631 -0.182438\n",
       "2021-01-02 -0.750873 -0.342183\n",
       "2021-01-03  1.115852  0.732277\n",
       "2021-01-04 -0.886727  1.140217\n",
       "2021-01-05  1.840986 -0.125656\n",
       "2021-01-06  1.032019 -0.492826"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[:, 1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For getting a value explicitly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0, 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For getting fast access to a scalar (equivalent to the prior method):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iat[0, 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Boolean indexing\n",
    "\n",
    "Boolean indexing is used to filter subsets of data based on actual values inside the DataFrame. It is called [Boolean](https://en.wikipedia.org/wiki/Boolean_data_type) since the filter condition is associated with one of two possible values (usually denoted `True` and `False`)\n",
    "\n",
    "Using a single column’s values to select data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01  1.000000 -0.479631 -0.182438  0.062465\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df[\"A\"] > 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Selecting values from a DataFrame where a boolean condition is met."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.062465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D\n",
       "2021-01-01  1.000000       NaN       NaN  0.062465\n",
       "2021-01-02       NaN       NaN       NaN       NaN\n",
       "2021-01-03       NaN  1.115852  0.732277  0.075960\n",
       "2021-01-04       NaN       NaN  1.140217  0.339868\n",
       "2021-01-05  1.591774  1.840986       NaN       NaN\n",
       "2021-01-06       NaN  1.032019       NaN       NaN"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df > 0]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's make a copy of the `df2` object’s indices and data. We use the default `deep=True` to create a new object with a copy of the calling object’s data and indices. Modifications to the data or indices of the copy will not be reflected in the original object. Note that a shallow copy (`deep=False`) shares data and index with original."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_copy = df.copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a new column in the DataFrame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_copy[\"E\"] = [\"one\", \"one\", \"two\", \"three\", \"four\", \"three\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "      <td>three</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>four</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "      <td>three</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D      E\n",
       "2021-01-01  1.000000 -0.479631 -0.182438  0.062465    one\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644    one\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960    two\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868  three\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874   four\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127  three"
      ]
     },
     "execution_count": 179,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_copy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the [isin()](https://pandas.pydata.org/docs/reference/api/pandas.Series.isin.html#pandas.Series.isin) method for filtering:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>four</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D     E\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960   two\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874  four"
      ]
     },
     "execution_count": 180,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_copy[df_copy[\"E\"].isin([\"two\", \"four\"])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting\n",
    "\n",
    "Setting a new column automatically aligns the data by the indexes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range(\"20210102\", periods=6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2021-01-02    1\n",
       "2021-01-03    2\n",
       "2021-01-04    3\n",
       "2021-01-05    4\n",
       "2021-01-06    5\n",
       "2021-01-07    6\n",
       "Freq: D, dtype: int64"
      ]
     },
     "execution_count": 182,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"F\"] = s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D    F\n",
       "2021-01-01  1.000000 -0.479631 -0.182438  0.062465  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960  2.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868  3.0\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874  4.0\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127  5.0"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setting values by label:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.at[dates[0], \"A\"] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.479631</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D    F\n",
       "2021-01-01  0.000000 -0.479631 -0.182438  0.062465  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960  2.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868  3.0\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874  4.0\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127  5.0"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setting values by position:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.iat[0, 1] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>0.062465</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-0.309644</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>0.075960</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>0.339868</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-2.238874</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-0.705127</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C         D    F\n",
       "2021-01-01  0.000000  0.000000 -0.182438  0.062465  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -0.309644  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  0.075960  2.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  0.339868  3.0\n",
       "2021-01-05  1.591774  1.840986 -0.125656 -2.238874  4.0\n",
       "2021-01-06 -1.875701  1.032019 -0.492826 -0.705127  5.0"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setting by assigning with a NumPy array:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[:, \"D\"] = np.array([5] * len(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F\n",
       "2021-01-01  0.000000  0.000000 -0.182438  5  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  5  2.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  5  3.0\n",
       "2021-01-05  1.591774  1.840986 -0.125656  5  4.0\n",
       "2021-01-06 -1.875701  1.032019 -0.492826  5  5.0"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A where operation with setting."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3 = df.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3[df3 > 0]= -df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>-5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>-5</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>-1.115852</td>\n",
       "      <td>-0.732277</td>\n",
       "      <td>-5</td>\n",
       "      <td>-2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>-1.140217</td>\n",
       "      <td>-5</td>\n",
       "      <td>-3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>-1.591774</td>\n",
       "      <td>-1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>-5</td>\n",
       "      <td>-4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>-1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>-5</td>\n",
       "      <td>-5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F\n",
       "2021-01-01  0.000000  0.000000 -0.182438 -5  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183 -5 -1.0\n",
       "2021-01-03 -0.644635 -1.115852 -0.732277 -5 -2.0\n",
       "2021-01-04 -0.313480 -0.886727 -1.140217 -5 -3.0\n",
       "2021-01-05 -1.591774 -1.840986 -0.125656 -5 -4.0\n",
       "2021-01-06 -1.875701 -1.032019 -0.492826 -5 -5.0"
      ]
     },
     "execution_count": 193,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Missing data\n",
    "\n",
    "pandas primarily uses the value np.nan to represent missing data. It is by default not included in computations. See the [Missing Data section](https://pandas.pydata.org/docs/user_guide/missing_data.html#missing-data) for details.\n",
    "\n",
    "Reindexing allows you to change/add/delete the index on a specified axis. This returns a copy of the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4 = df.reindex(index=dates[0:4], columns=list(df.columns) + [\"E\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F   E\n",
       "2021-01-01  0.000000  0.000000 -0.182438  5  NaN NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0 NaN\n",
       "2021-01-03 -0.644635  1.115852  0.732277  5  2.0 NaN\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  5  3.0 NaN"
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.loc[dates[0] : dates[1], \"E\"] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2021-01-01  0.000000  0.000000 -0.182438  5  NaN  1.0\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  5  2.0  NaN\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  5  3.0  NaN"
      ]
     },
     "execution_count": 197,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To drop any rows that have missing data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0  1.0"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4.dropna(how=\"any\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Filling missing data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F    E\n",
       "2021-01-01  0.000000  0.000000 -0.182438  5  5.0  1.0\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  5  2.0  5.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  5  3.0  5.0"
      ]
     },
     "execution_count": 199,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4.fillna(value=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To get the boolean mask where values are nan."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                A      B      C      D      F      E\n",
       "2021-01-01  False  False  False  False   True  False\n",
       "2021-01-02  False  False  False  False  False  False\n",
       "2021-01-03  False  False  False  False  False   True\n",
       "2021-01-04  False  False  False  False  False   True"
      ]
     },
     "execution_count": 200,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.isna(df4)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Operations\n",
    "\n",
    "Note that operations in general exclude missing data.\n",
    "\n",
    "### Statistics\n",
    "\n",
    "Performing a descriptive statistic:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    float64\n",
       "B    float64\n",
       "C    float64\n",
       "D      int64\n",
       "F    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -0.574397\n",
       "B    0.391876\n",
       "C    0.121565\n",
       "D    5.000000\n",
       "F    3.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Same operation on the other axis:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2021-01-01    1.204390\n",
       "2021-01-02    0.540521\n",
       "2021-01-03    1.640699\n",
       "2021-01-04    1.588002\n",
       "2021-01-05    2.461421\n",
       "2021-01-06    1.732698\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 203,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.mean(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Operating with objects that have different dimensionality and need alignment. In addition, pandas automatically broadcasts along the specified dimension.\n",
    "\n",
    "Let’s use the shift() function to shift the index axis by 2 periods in the positive direction (i.e., create 2021-01-05 and 2021-01-06):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [],
   "source": [
    "s2 = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2021-01-01    NaN\n",
       "2021-01-02    NaN\n",
       "2021-01-03    1.0\n",
       "2021-01-04    3.0\n",
       "2021-01-05    5.0\n",
       "2021-01-06    NaN\n",
       "Freq: D, dtype: float64"
      ]
     },
     "execution_count": 205,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can use the `sub()` function to subtract each element of a DataFrame with a corresponding element in a series. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.342183</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-0.644635</td>\n",
       "      <td>1.115852</td>\n",
       "      <td>0.732277</td>\n",
       "      <td>5</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-0.313480</td>\n",
       "      <td>-0.886727</td>\n",
       "      <td>1.140217</td>\n",
       "      <td>5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>1.591774</td>\n",
       "      <td>1.840986</td>\n",
       "      <td>-0.125656</td>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-1.875701</td>\n",
       "      <td>1.032019</td>\n",
       "      <td>-0.492826</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C  D    F\n",
       "2021-01-01  0.000000  0.000000 -0.182438  5  NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.342183  5  1.0\n",
       "2021-01-03 -0.644635  1.115852  0.732277  5  2.0\n",
       "2021-01-04 -0.313480 -0.886727  1.140217  5  3.0\n",
       "2021-01-05  1.591774  1.840986 -0.125656  5  4.0\n",
       "2021-01-06 -1.875701  1.032019 -0.492826  5  5.0"
      ]
     },
     "execution_count": 206,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-1.644635</td>\n",
       "      <td>0.115852</td>\n",
       "      <td>-0.267723</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-3.313480</td>\n",
       "      <td>-3.886727</td>\n",
       "      <td>-1.859783</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>-3.408226</td>\n",
       "      <td>-3.159014</td>\n",
       "      <td>-5.125656</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C    D    F\n",
       "2021-01-01       NaN       NaN       NaN  NaN  NaN\n",
       "2021-01-02       NaN       NaN       NaN  NaN  NaN\n",
       "2021-01-03 -1.644635  0.115852 -0.267723  4.0  1.0\n",
       "2021-01-04 -3.313480 -3.886727 -1.859783  2.0  0.0\n",
       "2021-01-05 -3.408226 -3.159014 -5.125656  0.0 -1.0\n",
       "2021-01-06       NaN       NaN       NaN  NaN  NaN"
      ]
     },
     "execution_count": 207,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sub(s2, axis=\"index\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Apply\n",
    "\n",
    "Applying functions to data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2021-01-01</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.182438</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-02</th>\n",
       "      <td>-2.204339</td>\n",
       "      <td>-0.750873</td>\n",
       "      <td>-0.524621</td>\n",
       "      <td>10</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-03</th>\n",
       "      <td>-2.848974</td>\n",
       "      <td>0.364979</td>\n",
       "      <td>0.207656</td>\n",
       "      <td>15</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-04</th>\n",
       "      <td>-3.162453</td>\n",
       "      <td>-0.521748</td>\n",
       "      <td>1.347873</td>\n",
       "      <td>20</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-05</th>\n",
       "      <td>-1.570680</td>\n",
       "      <td>1.319237</td>\n",
       "      <td>1.222217</td>\n",
       "      <td>25</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2021-01-06</th>\n",
       "      <td>-3.446381</td>\n",
       "      <td>2.351256</td>\n",
       "      <td>0.729391</td>\n",
       "      <td>30</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   A         B         C   D     F\n",
       "2021-01-01  0.000000  0.000000 -0.182438   5   NaN\n",
       "2021-01-02 -2.204339 -0.750873 -0.524621  10   1.0\n",
       "2021-01-03 -2.848974  0.364979  0.207656  15   3.0\n",
       "2021-01-04 -3.162453 -0.521748  1.347873  20   6.0\n",
       "2021-01-05 -1.570680  1.319237  1.222217  25  10.0\n",
       "2021-01-06 -3.446381  2.351256  0.729391  30  15.0"
      ]
     },
     "execution_count": 208,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.apply(np.cumsum)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A lambda function is a small anonymous function. It can take any number of arguments, but can only have one expression:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    3.796112\n",
       "B    2.727713\n",
       "C    1.633043\n",
       "D    0.000000\n",
       "F    4.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 209,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.apply(lambda x: x.max() - x.min())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Value counts (histogramming)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3\n",
       "1    0\n",
       "2    1\n",
       "3    1\n",
       "4    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s3 = pd.Series(np.random.randint(0, 4, size=5))\n",
    "\n",
    "s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    2\n",
       "3    1\n",
       "0    1\n",
       "2    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 211,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s3.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## String methods\n",
    "\n",
    "Series is equipped with a set of string processing methods in the str attribute that make it easy to operate on each element of the array, as in the code snippet below. Note that pattern-matching in str generally uses [regular expressions](https://docs.python.org/3/library/re.html) by default (and in some cases always uses them)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [],
   "source": [
    "s4 = pd.Series([\"A\", \"B\", \"C\", \"Aaba\", \"Baca\", np.nan, \"CABA\", \"dog\", \"cat\"])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       A\n",
       "1       B\n",
       "2       C\n",
       "3    Aaba\n",
       "4    Baca\n",
       "5     NaN\n",
       "6    CABA\n",
       "7     dog\n",
       "8     cat\n",
       "dtype: object"
      ]
     },
     "execution_count": 213,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       a\n",
       "1       b\n",
       "2       c\n",
       "3    aaba\n",
       "4    baca\n",
       "5     NaN\n",
       "6    caba\n",
       "7     dog\n",
       "8     cat\n",
       "dtype: object"
      ]
     },
     "execution_count": 214,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s4.str.lower()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merge\n",
    "\n",
    "### Concat\n",
    "\n",
    "pandas provides various facilities for easily combining together Series and DataFrame objects with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.\n",
    "\n",
    "Concatenating pandas objects together with [concat()](https://pandas.pydata.org/docs/reference/api/pandas.concat.html#pandas.concat):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [],
   "source": [
    "df5 = pd.DataFrame(np.random.randn(10, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.331507</td>\n",
       "      <td>-0.712894</td>\n",
       "      <td>0.864198</td>\n",
       "      <td>2.402081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.231050</td>\n",
       "      <td>-1.151274</td>\n",
       "      <td>-0.612431</td>\n",
       "      <td>-1.093598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.347278</td>\n",
       "      <td>2.007451</td>\n",
       "      <td>-0.950586</td>\n",
       "      <td>1.501542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.023453</td>\n",
       "      <td>1.495769</td>\n",
       "      <td>-1.552427</td>\n",
       "      <td>0.318594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.097536</td>\n",
       "      <td>3.403314</td>\n",
       "      <td>-0.649650</td>\n",
       "      <td>-0.076354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.463055</td>\n",
       "      <td>0.119990</td>\n",
       "      <td>-0.016359</td>\n",
       "      <td>1.303239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.204622</td>\n",
       "      <td>-0.235922</td>\n",
       "      <td>0.766144</td>\n",
       "      <td>0.996975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-1.497416</td>\n",
       "      <td>0.790123</td>\n",
       "      <td>-0.017615</td>\n",
       "      <td>0.466254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-1.363141</td>\n",
       "      <td>1.701461</td>\n",
       "      <td>-1.006705</td>\n",
       "      <td>-0.354086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-0.330299</td>\n",
       "      <td>0.721074</td>\n",
       "      <td>1.240544</td>\n",
       "      <td>-0.531562</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3\n",
       "0 -0.331507 -0.712894  0.864198  2.402081\n",
       "1 -1.231050 -1.151274 -0.612431 -1.093598\n",
       "2 -0.347278  2.007451 -0.950586  1.501542\n",
       "3  0.023453  1.495769 -1.552427  0.318594\n",
       "4  1.097536  3.403314 -0.649650 -0.076354\n",
       "5 -0.463055  0.119990 -0.016359  1.303239\n",
       "6 -0.204622 -0.235922  0.766144  0.996975\n",
       "7 -1.497416  0.790123 -0.017615  0.466254\n",
       "8 -1.363141  1.701461 -1.006705 -0.354086\n",
       "9 -0.330299  0.721074  1.240544 -0.531562"
      ]
     },
     "execution_count": 216,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[          0         1         2         3\n",
       " 0 -0.331507 -0.712894  0.864198  2.402081\n",
       " 1 -1.231050 -1.151274 -0.612431 -1.093598\n",
       " 2 -0.347278  2.007451 -0.950586  1.501542,\n",
       "           0         1         2         3\n",
       " 3  0.023453  1.495769 -1.552427  0.318594\n",
       " 4  1.097536  3.403314 -0.649650 -0.076354\n",
       " 5 -0.463055  0.119990 -0.016359  1.303239\n",
       " 6 -0.204622 -0.235922  0.766144  0.996975,\n",
       "           0         1         2         3\n",
       " 7 -1.497416  0.790123 -0.017615  0.466254\n",
       " 8 -1.363141  1.701461 -1.006705 -0.354086\n",
       " 9 -0.330299  0.721074  1.240544 -0.531562]"
      ]
     },
     "execution_count": 217,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# break it into pieces\n",
    "pieces = [df5[:3], df5[3:7], df5[7:]]\n",
    "\n",
    "pieces"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.331507</td>\n",
       "      <td>-0.712894</td>\n",
       "      <td>0.864198</td>\n",
       "      <td>2.402081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.231050</td>\n",
       "      <td>-1.151274</td>\n",
       "      <td>-0.612431</td>\n",
       "      <td>-1.093598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.347278</td>\n",
       "      <td>2.007451</td>\n",
       "      <td>-0.950586</td>\n",
       "      <td>1.501542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.023453</td>\n",
       "      <td>1.495769</td>\n",
       "      <td>-1.552427</td>\n",
       "      <td>0.318594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.097536</td>\n",
       "      <td>3.403314</td>\n",
       "      <td>-0.649650</td>\n",
       "      <td>-0.076354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.463055</td>\n",
       "      <td>0.119990</td>\n",
       "      <td>-0.016359</td>\n",
       "      <td>1.303239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.204622</td>\n",
       "      <td>-0.235922</td>\n",
       "      <td>0.766144</td>\n",
       "      <td>0.996975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-1.497416</td>\n",
       "      <td>0.790123</td>\n",
       "      <td>-0.017615</td>\n",
       "      <td>0.466254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-1.363141</td>\n",
       "      <td>1.701461</td>\n",
       "      <td>-1.006705</td>\n",
       "      <td>-0.354086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-0.330299</td>\n",
       "      <td>0.721074</td>\n",
       "      <td>1.240544</td>\n",
       "      <td>-0.531562</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3\n",
       "0 -0.331507 -0.712894  0.864198  2.402081\n",
       "1 -1.231050 -1.151274 -0.612431 -1.093598\n",
       "2 -0.347278  2.007451 -0.950586  1.501542\n",
       "3  0.023453  1.495769 -1.552427  0.318594\n",
       "4  1.097536  3.403314 -0.649650 -0.076354\n",
       "5 -0.463055  0.119990 -0.016359  1.303239\n",
       "6 -0.204622 -0.235922  0.766144  0.996975\n",
       "7 -1.497416  0.790123 -0.017615  0.466254\n",
       "8 -1.363141  1.701461 -1.006705 -0.354086\n",
       "9 -0.330299  0.721074  1.240544 -0.531562"
      ]
     },
     "execution_count": 218,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(pieces)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Join"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   key  lval\n",
      "0  foo     1\n",
      "1  foo     2\n",
      "---------------\n",
      "   key  rval\n",
      "0  foo     4\n",
      "1  foo     5\n"
     ]
    }
   ],
   "source": [
    "left = pd.DataFrame({\"key\": [\"foo\", \"foo\"], \"lval\": [1, 2]})\n",
    "\n",
    "right = pd.DataFrame({\"key\": [\"foo\", \"foo\"], \"rval\": [4, 5]})\n",
    "\n",
    "print(left)\n",
    "print(\"-\" * 15)\n",
    "print(right)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>lval</th>\n",
       "      <th>rval</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>foo</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>foo</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>foo</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>foo</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   key  lval  rval\n",
       "0  foo     1     4\n",
       "1  foo     1     5\n",
       "2  foo     2     4\n",
       "3  foo     2     5"
      ]
     },
     "execution_count": 220,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on=\"key\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Another example that can be given is:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   key  lval\n",
      "0  foo     1\n",
      "1  bar     2\n",
      "---------------\n",
      "   key  rval\n",
      "0  foo     4\n",
      "1  bar     5\n"
     ]
    }
   ],
   "source": [
    "left = pd.DataFrame({\"key\": [\"foo\", \"bar\"], \"lval\": [1, 2]})\n",
    "\n",
    "right = pd.DataFrame({\"key\": [\"foo\", \"bar\"], \"rval\": [4, 5]})\n",
    "\n",
    "print(left)\n",
    "print(\"-\" * 15)\n",
    "print(right)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>lval</th>\n",
       "      <th>rval</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>foo</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>bar</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   key  lval  rval\n",
       "0  foo     1     4\n",
       "1  bar     2     5"
      ]
     },
     "execution_count": 222,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on=\"key\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Grouping\n",
    "\n",
    "By “group by” we are referring to a process involving one or more of the following steps:\n",
    "\n",
    "- **Splitting** the data into groups based on some criteria\n",
    "\n",
    "- **Applying** a function to each group independently\n",
    "\n",
    "- **Combining** the results into a data structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>foo</td>\n",
       "      <td>one</td>\n",
       "      <td>1.192088</td>\n",
       "      <td>0.689021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>bar</td>\n",
       "      <td>one</td>\n",
       "      <td>0.594093</td>\n",
       "      <td>0.040047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>foo</td>\n",
       "      <td>two</td>\n",
       "      <td>-0.847046</td>\n",
       "      <td>0.329965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>bar</td>\n",
       "      <td>three</td>\n",
       "      <td>-0.568368</td>\n",
       "      <td>-1.509250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>foo</td>\n",
       "      <td>two</td>\n",
       "      <td>1.548249</td>\n",
       "      <td>-0.667777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>bar</td>\n",
       "      <td>two</td>\n",
       "      <td>0.251481</td>\n",
       "      <td>0.011822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>foo</td>\n",
       "      <td>one</td>\n",
       "      <td>-0.147668</td>\n",
       "      <td>1.181461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>foo</td>\n",
       "      <td>three</td>\n",
       "      <td>0.838016</td>\n",
       "      <td>1.066788</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A      B         C         D\n",
       "0  foo    one  1.192088  0.689021\n",
       "1  bar    one  0.594093  0.040047\n",
       "2  foo    two -0.847046  0.329965\n",
       "3  bar  three -0.568368 -1.509250\n",
       "4  foo    two  1.548249 -0.667777\n",
       "5  bar    two  0.251481  0.011822\n",
       "6  foo    one -0.147668  1.181461\n",
       "7  foo  three  0.838016  1.066788"
      ]
     },
     "execution_count": 223,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df6 = pd.DataFrame(\n",
    "    {\n",
    "        \"A\": [\"foo\", \"bar\", \"foo\", \"bar\", \"foo\", \"bar\", \"foo\", \"foo\"],\n",
    "        \"B\": [\"one\", \"one\", \"two\", \"three\", \"two\", \"two\", \"one\", \"three\"],\n",
    "        \"C\": np.random.randn(8),\n",
    "        \"D\": np.random.randn(8),\n",
    "    }\n",
    ")\n",
    "\n",
    "df6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Grouping and then applying the sum() function to the resulting groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>bar</th>\n",
       "      <td>0.277206</td>\n",
       "      <td>-1.457381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>foo</th>\n",
       "      <td>2.583640</td>\n",
       "      <td>2.599458</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            C         D\n",
       "A                      \n",
       "bar  0.277206 -1.457381\n",
       "foo  2.583640  2.599458"
      ]
     },
     "execution_count": 224,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df6.groupby(\"A\").sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Grouping by multiple columns forms a hierarchical index, and again we can apply the sum() function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">bar</th>\n",
       "      <th>one</th>\n",
       "      <td>0.594093</td>\n",
       "      <td>0.040047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>-0.568368</td>\n",
       "      <td>-1.509250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.251481</td>\n",
       "      <td>0.011822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">foo</th>\n",
       "      <th>one</th>\n",
       "      <td>1.044420</td>\n",
       "      <td>1.870481</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>0.838016</td>\n",
       "      <td>1.066788</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.701203</td>\n",
       "      <td>-0.337812</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  C         D\n",
       "A   B                        \n",
       "bar one    0.594093  0.040047\n",
       "    three -0.568368 -1.509250\n",
       "    two    0.251481  0.011822\n",
       "foo one    1.044420  1.870481\n",
       "    three  0.838016  1.066788\n",
       "    two    0.701203 -0.337812"
      ]
     },
     "execution_count": 225,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df6.groupby([\"A\", \"B\"]).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reshaping\n",
    "\n",
    "### Stack"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('bar', 'one'),\n",
       " ('bar', 'two'),\n",
       " ('baz', 'one'),\n",
       " ('baz', 'two'),\n",
       " ('foo', 'one'),\n",
       " ('foo', 'two'),\n",
       " ('qux', 'one'),\n",
       " ('qux', 'two')]"
      ]
     },
     "execution_count": 226,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tuples = list(\n",
    "    zip(\n",
    "        *[\n",
    "            [\"bar\", \"bar\", \"baz\", \"baz\", \"foo\", \"foo\", \"qux\", \"qux\"],\n",
    "            [\"one\", \"two\", \"one\", \"two\", \"one\", \"two\", \"one\", \"two\"],\n",
    "        ]\n",
    "    )\n",
    ")\n",
    "\n",
    "tuples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([('bar', 'one'),\n",
       "            ('bar', 'two'),\n",
       "            ('baz', 'one'),\n",
       "            ('baz', 'two'),\n",
       "            ('foo', 'one'),\n",
       "            ('foo', 'two'),\n",
       "            ('qux', 'one'),\n",
       "            ('qux', 'two')],\n",
       "           names=['first', 'second'])"
      ]
     },
     "execution_count": 227,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = pd.MultiIndex.from_tuples(tuples, names=[\"first\", \"second\"])\n",
    "\n",
    "index"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "463226f144cc21b006ce6927bfc93dd00694e52c8bc6857abb6e555b983749e9"
  },
  "kernelspec": {
   "display_name": "Python 3.8.2 64-bit ('base': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}