Spaces:
Sleeping
Sleeping
fixed paths on parquet write section
Browse files- notebooks/assesment.ipynb +2 -2
- notebooks/solutions.ipynb +2 -2
notebooks/assesment.ipynb
CHANGED
@@ -256,10 +256,10 @@
|
|
256 |
"source": [
|
257 |
"# 7. Writing to Parquet\n",
|
258 |
"# =====================\n",
|
259 |
-
"# We'll write the cleaned Spark DataFrame to a Parquet file (e.g. \"titanic_merged_clean.parquet\").\n",
|
260 |
"\n",
|
261 |
"# 7.1 TODO: Write spark_merged_clean to Parquet\n",
|
262 |
-
"# e.g., spark_merged_clean.write.
|
263 |
"\n",
|
264 |
"# 7.2 TODO: Read it back into a new Spark DataFrame called 'spark_parquet_df'\n",
|
265 |
"# spark_parquet_df = ?\n",
|
|
|
256 |
"source": [
|
257 |
"# 7. Writing to Parquet\n",
|
258 |
"# =====================\n",
|
259 |
+
"# We'll write the cleaned Spark DataFrame to a Parquet file (e.g. \"../titanic_merged_clean.parquet\").\n",
|
260 |
"\n",
|
261 |
"# 7.1 TODO: Write spark_merged_clean to Parquet\n",
|
262 |
+
"# e.g., spark_merged_clean.write. ...\n",
|
263 |
"\n",
|
264 |
"# 7.2 TODO: Read it back into a new Spark DataFrame called 'spark_parquet_df'\n",
|
265 |
"# spark_parquet_df = ?\n",
|
notebooks/solutions.ipynb
CHANGED
@@ -221,7 +221,7 @@
|
|
221 |
"source": [
|
222 |
"#Write spark df to parquet\n",
|
223 |
"\n",
|
224 |
-
"spark_merged_clean.write.mode(\"overwrite\").parquet(\"titanic_merged_clean.parquet\")"
|
225 |
]
|
226 |
},
|
227 |
{
|
@@ -232,7 +232,7 @@
|
|
232 |
"source": [
|
233 |
"#Read parquet back in\n",
|
234 |
"\n",
|
235 |
-
"spark_parquet_df = spark.read.parquet(\"titanic_merged_clean.parquet\")\n",
|
236 |
"print(\"spark_parquet_df count:\", spark_parquet_df.count())\n",
|
237 |
"spark_parquet_df.show(5)\n"
|
238 |
]
|
|
|
221 |
"source": [
|
222 |
"#Write spark df to parquet\n",
|
223 |
"\n",
|
224 |
+
"spark_merged_clean.write.mode(\"overwrite\").parquet(\"../titanic_merged_clean.parquet\")"
|
225 |
]
|
226 |
},
|
227 |
{
|
|
|
232 |
"source": [
|
233 |
"#Read parquet back in\n",
|
234 |
"\n",
|
235 |
+
"spark_parquet_df = spark.read.parquet(\"../titanic_merged_clean.parquet\")\n",
|
236 |
"print(\"spark_parquet_df count:\", spark_parquet_df.count())\n",
|
237 |
"spark_parquet_df.show(5)\n"
|
238 |
]
|