dplyr cheatsheet

2017-02-13 18:05:54 +05:30 · 2017-02-13 18:05:54 +05:30 · 82cc1ced34
parent e48b05318d
commit 82cc1ced34
1 changed files with 200 additions and 0 deletions
--- a/share/goodie/cheat_sheets/json/dplyr.json
+++ b/share/goodie/cheat_sheets/json/dplyr.json
@ -0,0 +1,200 @@
+{
+   "id" : "dplyr_cheat_sheet",
+   "name" : "Dplyr Cheat Sheet",
+   "description" : "Dplyr R basic usage",
+   "metadata" : {
+      "sourceName" : "R studio",
+      "sourceUrl" : "https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf"
+   },
+   "aliases": [
+       "dplyr", "data wrangling in R", "data cleaning in R", "data munging in R"
+   ],
+   "template_type": "terminal",
+   "section_order" : [
+      "Basics",
+      "Reshaping Data",
+      "Subset Observations (Row)",
+      "Subset Variables (Columns)",
+      "Summarize Data",
+      "Make New Variables",
+      "Combine Data sets",
+      "Group Data"
+   ],
+   "sections" : {
+      "Basics" : [
+         {
+            "key" : "dplyr::tbl_df(iris)",
+            "val" : "Converts data to tbl class. tbl's are easier to examine than data frames."
+         },
+         {
+            "key" : "dplyr::glimpse(iris)",
+            "val" : "Information dense summary of tbl data."
+         },
+         {
+            "key" : "dplyr::%>%",
+            "val" : "Passes object on left hand side as first argument of function on righthand side. x %>% f(y) is the same as f(x, y)"
+         }
+      ],
+      "Reshaping Data" : [
+         {
+            "key" : "dplyr::data_frame(a = 1:3, b = 4:6)",
+            "val" : "Combine vectors into data frame (optimized)."
+
+         },
+         {
+            "key" : "dplyr::arrange(dataframe, variableName)",
+            "val" : "Order rows in dataframe by values of variableName (low to high)."
+
+         },
+         {
+            "key" : "dplyr::arrange(dataframe, desc(variableName)) ",
+            "val" : "Order rows in dataframe by values of variableName (high to low)."
+         },
+         {
+            "key" : "dplyr::rename(dataframe, newname1 = oldname1, newname2 = oldname2)",
+            "val" : "Rename the columns of a dataframe"
+         }
+      ],
+      "Subset Observations (Row)" : [
+         {
+            "key" : "dplyr::filter(iris, Sepal.Length > 7)",
+            "val" : "Extract rows that meet logical criteria."
+         },
+         {
+            "key" : "dplyr::distinct(iris)",
+            "val" : "Remove duplicate rows."
+         },
+         {
+            "key" : "dplyr::sample_frac(iris, 0.5, replace = TRUE)",
+            "val" : "Randomly select fraction of rows."
+         },
+         {
+            "key" : "dplyr::sample_n(iris, 10, replace = TRUE)",
+            "val" : "Randomly select n rows."
+         },
+         {
+            "key" : "dplyr::slice(iris, 10:15)",
+            "val" : "Select rows by position. "
+         },
+         {
+            "key" : "dplyr::top_n(storms, 2, date)",
+            "val" : "Select and order top n entries (by group if grouped data)."
+         }
+      ],
+      "Subset Variables (Columns)" : [
+         {
+            "key" : "dplyr::select(iris, Sepal.Width, Petal.Length, Species)",
+            "val" : "Select columns by name."
+         },
+         {
+            "key" : "dplyr::select(iris, contains('.'))",
+            "val" : "Select columns whose name contains a character string."
+         },
+         {
+            "key" : "dplyr::select(iris, starts_with('Sepal'))",
+            "val" : "Select columns whose name starts with a character string."
+         },
+         {
+            "key" : "dplyr::select(iris, ends_with('Length'))",
+            "val" : "Select columns whose name ends with a character string."
+         },
+         {
+            "key" : "dplyr::select(iris, num_range('x', 1:5))",
+            "val" : "Select columns named x1, x2, x3, x4, x5."
+         },
+         {
+            "key" : "dplyr::select(iris, one_of(c('Species', 'Genus')))",
+            "val" : "Select columns whose names are in a group of names."
+         },
+         {
+            "key" : "dplyr::select(iris, -Species)",
+            "val" : "Select all columns except Species."
+         }
+      ],
+      "Summarize Data" : [
+         {
+            "key" : "dplyr::summarise(iris, avg = mean(Sepal.Length))",
+            "val" : "Summarise data into single row of values."
+         },
+         {
+            "key" : "dplyr::summarise_each(iris, funs(mean))",
+            "val" : "Apply summary function (funs) to each column. "
+         },
+         {
+            "key" : "dplyr::count(iris, Species, wt = Sepal.Length)",
+            "val" : "Count number of rows with each unique value of variable (with or without weights)."
+         }
+      ],
+      "Make New Variables" : [
+         {
+            "key" : "dplyr::mutate(iris, sepal = Sepal.Length + Sepal. Width)",
+            "val" : "Compute and append one or more new columns."
+         },
+         {
+            "key" : "dplyr::mutate_each(iris, funs(min_rank))",
+            "val" : "Apply window function to each column."
+         },
+         {
+            "key" : "dplyr::transmute(iris, sepal = Sepal.Length + Sepal. Width)",
+            "val" : "Compute one or more new columns. Drop original columns."
+         }
+      ],
+      "Combine Data sets" : [
+         {
+            "key" : "dplyr::lef_join(a, b, by = 'x1')",
+            "val" : "Join matching rows from b to a."
+         },
+         {
+            "key" : "dplyr::right_join(a, b, by = 'x1')",
+            "val" : "Join matching rows from a to b."
+         },
+         {
+            "key" : "dplyr::inner_join(a, b, by = 'x1')",
+            "val" : "Join data. Retain only rows in both sets."
+         },
+         {
+            "key" : "dplyr::full_join(a, b, by = 'x1')",
+            "val" : "Join data. Retain all values, all rows."
+         },
+         {
+            "key" : "dplyr::semi_join(a, b, by = 'x1')",
+            "val" : "All rows in a that have a match in b."
+         },
+         {
+            "key" : "dplyr::anti_join(a, b, by = 'x1')",
+            "val" : "All rows in a that do not have a match in b."
+         },
+         {
+            "key" : "dplyr::intersect(y, z)",
+            "val" : "Rows that appear in both y and z."
+         },
+         {
+            "key" : "dplyr::union(y, z)",
+            "val" : "Rows that appear in either or both y and z."
+         },
+         {
+            "key" : "dplyr::setdiff(y, z)",
+            "val" : "Rows that appear in y but not z."
+         },
+         {
+            "key" : "dplyr::bind_rows(y, z)",
+            "val" : "Append z to y as new rows."
+         },
+         {
+            "key" : "dplyr::bind_cols(y, z)",
+            "val" : "Append z to y as new columns. Caution: matches rows by position."
+         }
+      ],
+      "Group Data" : [
+         {
+            "key" : "dplyr::group_by(iris, Species)",
+            "val" : "Group data into rows with the same value of Species."
+         },
+         {
+            "key" : "dplyr::ungroup(iris)",
+            "val" : "Remove grouping information from data frame."
+         }
+      ]
+
+   }
+}