deagar commited on
Commit
c02076c
·
1 Parent(s): 96e2e87

fixed paths on parquet write section

Browse files
notebooks/assesment.ipynb CHANGED
@@ -256,10 +256,10 @@
256
  "source": [
257
  "# 7. Writing to Parquet\n",
258
  "# =====================\n",
259
- "# We'll write the cleaned Spark DataFrame to a Parquet file (e.g. \"titanic_merged_clean.parquet\").\n",
260
  "\n",
261
  "# 7.1 TODO: Write spark_merged_clean to Parquet\n",
262
- "# e.g., spark_merged_clean.write.mode(\"overwrite\").parquet(\"titanic_merged_clean.parquet\")\n",
263
  "\n",
264
  "# 7.2 TODO: Read it back into a new Spark DataFrame called 'spark_parquet_df'\n",
265
  "# spark_parquet_df = ?\n",
 
256
  "source": [
257
  "# 7. Writing to Parquet\n",
258
  "# =====================\n",
259
+ "# We'll write the cleaned Spark DataFrame to a Parquet file (e.g. \"../titanic_merged_clean.parquet\").\n",
260
  "\n",
261
  "# 7.1 TODO: Write spark_merged_clean to Parquet\n",
262
+ "# e.g., spark_merged_clean.write. ...\n",
263
  "\n",
264
  "# 7.2 TODO: Read it back into a new Spark DataFrame called 'spark_parquet_df'\n",
265
  "# spark_parquet_df = ?\n",
notebooks/solutions.ipynb CHANGED
@@ -221,7 +221,7 @@
221
  "source": [
222
  "#Write spark df to parquet\n",
223
  "\n",
224
- "spark_merged_clean.write.mode(\"overwrite\").parquet(\"titanic_merged_clean.parquet\")"
225
  ]
226
  },
227
  {
@@ -232,7 +232,7 @@
232
  "source": [
233
  "#Read parquet back in\n",
234
  "\n",
235
- "spark_parquet_df = spark.read.parquet(\"titanic_merged_clean.parquet\")\n",
236
  "print(\"spark_parquet_df count:\", spark_parquet_df.count())\n",
237
  "spark_parquet_df.show(5)\n"
238
  ]
 
221
  "source": [
222
  "#Write spark df to parquet\n",
223
  "\n",
224
+ "spark_merged_clean.write.mode(\"overwrite\").parquet(\"../titanic_merged_clean.parquet\")"
225
  ]
226
  },
227
  {
 
232
  "source": [
233
  "#Read parquet back in\n",
234
  "\n",
235
+ "spark_parquet_df = spark.read.parquet(\"../titanic_merged_clean.parquet\")\n",
236
  "print(\"spark_parquet_df count:\", spark_parquet_df.count())\n",
237
  "spark_parquet_df.show(5)\n"
238
  ]