diff --git a/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation.ipynb b/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation.ipynb index e45724e9b33b1a169b74b0a439fccca96ec51184..91ba588b7cbd4b6fc52046eae29aa7dd63b0afe4 100644 --- a/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation.ipynb +++ b/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation.ipynb @@ -25,21 +25,7 @@ "source": [ "# Lecture 35 Pandas 3: Data Transformation\n", "* Data transformation is the process of changing the format, structure, or values of data. \n", - "* Often needed during data cleaning and sometimes during data analysis\n", - "\n", - "Possible data transformation: \n", - "* Parsing/Extraction\n", - " * Parse CSV to Pandas DataFrame\n", - "* Missing Value Manipulations\n", - " * Dropping\n", - " * Imputation: replace missing value with substitute values\n", - "* Typecasting, Formating, Renaming\n", - " * Typecasting: covert one column from int to float \n", - " * Formating: format the time column to datatime object \n", - " * Renaming: rename column and index names \n", - "* Applying/Mapping \n", - "* Filtering, Aggregation, Grouping, and Summarization\n", - " * Covered in Pandas 1 & 2 lectures" + "* Often needed during data cleaning and sometimes during data analysis" ] }, { @@ -50,14 +36,15 @@ "source": [ "# Today's Learning Objectives: \n", "\n", - "* Missing Value Manipulations\n", - " * check, drop, and fill NaN using Pandas .isna, .dropna, and .fillna\n", - "* Applying/Mapping\n", - " * Use .apply on Pandas Series and DataFrame rows/columns \n", - " * Use .replace to replace all target values \n", - "* Filtering, Aggregation, Grouping, and Summarization\n", - " * More .groupby examples \n", - " * Convert .groupby examples to SQL " + "* Setting column as index for pandas `DataFrame`\n", + "* Identify, drop, or fill missing values (`np.NaN`) using Pandas `isna`, `dropna`, and `fillna`\n", + "* Applying transformations to `DataFrame`:\n", + " * Use `apply` on pandas `Series` to apply a transformation function\n", + " * Use `replace` to replace all target values in Pandas `Series` and `DataFrame` rows / columns\n", + "* Filter, aggregate, group, and summarize information in a `DataFrame` with `groupby`\n", + "* Convert .groupby examples to SQL\n", + "* Solving the same question using SQL and pandas `DataFrame` manipulations:\n", + " * filtering, grouping, and aggregation / summarization" ] }, { @@ -200,7 +187,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### WARMUP 3: Use pandas lookup expression to identify the column names and the types: use .iloc" + "### WARMUP 3: Use pandas lookup expression to extract the \"sql\" column and display the full query using .iloc lookup" ] }, { @@ -2172,7 +2159,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### `lambda` recap\n", + "### `lambda`\n", "\n", "Let's write a `lambda` function instead of the `replace_mode` function" ] diff --git a/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation_template.ipynb b/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation_template.ipynb index ebd46ae0cf3c5711b469ead673842a244dd42ce8..40be2272f22cc70354345435a6440dfc3f389fe7 100644 --- a/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation_template.ipynb +++ b/f22/meena_lec_notes/lec-35/lec_35_pandas3_data_transformation_template.ipynb @@ -25,21 +25,7 @@ "source": [ "# Lecture 35 Pandas 3: Data Transformation\n", "* Data transformation is the process of changing the format, structure, or values of data. \n", - "* Often needed during data cleaning and sometimes during data analysis\n", - "\n", - "Possible data transformation: \n", - "* Parsing/Extraction\n", - " * Parse CSV to Pandas DataFrame\n", - "* Missing Value Manipulations\n", - " * Dropping\n", - " * Imputation: replace missing value with substitute values\n", - "* Typecasting, Formating, Renaming\n", - " * Typecasting: covert one column from int to float \n", - " * Formating: format the time column to datatime object \n", - " * Renaming: rename column and index names \n", - "* Applying/Mapping \n", - "* Filtering, Aggregation, Grouping, and Summarization\n", - " * Covered in Pandas 1 & 2 lectures" + "* Often needed during data cleaning and sometimes during data analysis" ] }, { @@ -50,14 +36,15 @@ "source": [ "# Today's Learning Objectives: \n", "\n", - "* Missing Value Manipulations\n", - " * check, drop, and fill NaN using Pandas .isna, .dropna, and .fillna\n", - "* Applying/Mapping\n", - " * Use .apply on Pandas Series and DataFrame rows/columns \n", - " * Use .replace to replace all target values \n", - "* Filtering, Aggregation, Grouping, and Summarization\n", - " * More .groupby examples \n", - " * Convert .groupby examples to SQL " + "* Setting column as index for pandas `DataFrame`\n", + "* Identify, drop, or fill missing values (`np.NaN`) using Pandas `isna`, `dropna`, and `fillna`\n", + "* Applying transformations to `DataFrame`:\n", + " * Use `apply` on pandas `Series` to apply a transformation function\n", + " * Use `replace` to replace all target values in Pandas `Series` and `DataFrame` rows / columns\n", + "* Filter, aggregate, group, and summarize information in a `DataFrame` with `groupby`\n", + "* Convert .groupby examples to SQL\n", + "* Solving the same question using SQL and pandas `DataFrame` manipulations:\n", + " * filtering, grouping, and aggregation / summarization" ] }, { @@ -136,7 +123,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### WARMUP 3: Use pandas lookup expression to identify the column names and the types: use .iloc" + "### WARMUP 3: Use pandas lookup expression to extract the \"sql\" column and display the full query using .iloc lookup" ] }, { @@ -372,7 +359,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### `lambda` recap\n", + "### `lambda`\n", "\n", "Let's write a `lambda` function instead of the `replace_mode` function" ]