{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "http://pandas.pydata.org/pandas-docs/stable/10min.html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# **10 Minutes to pandas plus Geopandas Example**\n", "\n", "This is a short introduction to pandas, geared mainly for new users. You can see more complex recipes in the [Cookbook](http://pandas.pydata.org/pandas-docs/stable/cookbook.html#cookbook). It is from official pandas documentation.\n", "\n", "I add a geopandas example at the end to show what Python can do quite easily." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The Main Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Customarily, we import as follows:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# tools for data analysis\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# general python tools\n", "import requests, zipfile, io, os\n", "\n", "#plotting packages\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# working with geodata\n", "from shapely.geometry import Point\n", "import geopandas as gpd # geopandas!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Object Creation\n", "\n", "See the [Data Structure Intro section](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dsintro) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a Series by passing a list of values, letting pandas create a default integer index:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "s = pd.Series([1,3,5,np.nan,6,8])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3.0\n", "1 9.0\n", "2 15.0\n", "3 NaN\n", "4 18.0\n", "5 24.0\n", "dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s*3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "s+2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "dates = pd.date_range('20130101', periods=6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "dates" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "data": { "text/html": [ "
\n", " | A | \n", "B | \n", "C | \n", "D | \n", "
---|---|---|---|---|
2013-01-01 | \n", "-0.351095 | \n", "-1.412039 | \n", "0.801410 | \n", "-0.493340 | \n", "
2013-01-02 | \n", "-1.594646 | \n", "0.573616 | \n", "0.650687 | \n", "-0.337713 | \n", "
2013-01-03 | \n", "0.582110 | \n", "0.777010 | \n", "0.253469 | \n", "0.077428 | \n", "
2013-01-04 | \n", "-0.923868 | \n", "0.960250 | \n", "-2.035741 | \n", "0.061624 | \n", "
2013-01-05 | \n", "-0.205958 | \n", "-0.079455 | \n", "-1.051752 | \n", "1.288812 | \n", "
2013-01-06 | \n", "1.950005 | \n", "-0.760363 | \n", "0.975510 | \n", "-0.984661 | \n", "