{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1ed33b62-f09e-44e5-84c2-9b7be07998be",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lec1.ipynb  lec2.ipynb\n"
     ]
    }
   ],
   "source": [
    "! ls"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c8a9067-a138-43b9-ae21-e6bc6eb5a133",
   "metadata": {},
   "source": [
    "# Command Line Examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "11602a88-79ab-4d51-8def-ab1b3bca6a31",
   "metadata": {},
   "outputs": [],
   "source": [
    "!hdfs dfs -mkdir hdfs://main:9000/data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "59dca956-2612-49b6-a94c-92dc1dbe4df7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 1 items\n",
      "drwxr-xr-x   - root supergroup          0 2025-03-05 17:12 hdfs://main:9000/data\n"
     ]
    }
   ],
   "source": [
    "!hdfs dfs -ls hdfs://main:9000/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9946811e-f9bb-4c50-a39a-2ebddb543ffa",
   "metadata": {},
   "outputs": [],
   "source": [
    "!hdfs dfs -cp /hadoop-3.3.6/LICENSE.txt hdfs://main:9000/data/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9faef755-777b-47f2-91e3-83366d97d552",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 1 items\n",
      "-rw-r--r--   3 root supergroup      15217 2025-03-05 17:13 hdfs://main:9000/data/LICENSE.txt\n"
     ]
    }
   ],
   "source": [
    "!hdfs dfs -ls hdfs://main:9000/data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "64807b1e-f323-4271-bf19-7a3aed50cc86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "                                 Apache License\n",
      "                           Version 2.0, January 2004\n",
      "                        http://www.apache.org/licenses/\n",
      "\n",
      "   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n",
      "\n",
      "   1. Definitions.\n",
      "\n",
      "      \"License\" shall mean the terms and conditions for use, reproduction,\n",
      "      and distribution as defined by Sections 1 through 9 of this document.\n",
      "\n",
      "      \"Licensor\" shall mean the copyright owner or entity authorized by\n",
      "      the copyright owner that is granting the License.\n",
      "\n",
      "      \"Legal Entity\" shall mean the union of the acting entity and all\n",
      "      other entities that control, are controlled by, or are under common\n",
      "      control with that entity. For the purposes of this definition,\n",
      "      \"control\" means (i) the power, direct or indirect, to cause the\n",
      "      direction or management of such entity, whether by contract or\n",
      "      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n",
      "      outstanding shares, or (iii) beneficial ownership of such entity.\n",
      "\n",
      "      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n",
      "      exercising permissions granted by this License.\n",
      "\n",
      "      \"Source\" form shall mean the preferred form for making modifications,\n",
      "      including but not limited to software source code, documentation\n",
      "      source, and configuration files.\n",
      "\n",
      "      \"Object\" form shall mean any form resulting from mechanical\n",
      "      transformation or translation of a Source form, including but\n",
      "      not limited to compiled object code, generated documentation,\n",
      "      and conversions to other media types.\n",
      "\n",
      "      \"Work\" shall mean the work of authorship, whether in Source or\n",
      "      Object form, made available under the License, as indicated by a\n",
      "      copyright notice that is included in or attached to the work\n",
      "      (an example is provided in the Appendix below).\n",
      "\n",
      "      \"Derivative Works\" shall mean any work, whether in Source or Object\n",
      "      form, that is based on (or derived from) the Work and for which the\n",
      "      editorial revisions, annotations, elaborations, or other modifications\n",
      "      represent, as a whole, an original work of authorship. For the purposes\n",
      "      of this License, Derivative Works shall not include works that remain\n",
      "      separable from, or merely link (or bind by name) to the interfaces of,\n",
      "      the Work and Derivative Works thereof.\n",
      "\n",
      "      \"Contribution\" shall mean any work of authorship, including\n",
      "      the original version of the Work and any modifications or additions\n",
      "      to that Work or Derivative Works thereof, that is intentionally\n",
      "      submitted to Licensor for inclusion in the Work by the copyright owner\n",
      "      or by an individual or Legal Entity authorized to submit on behalf of\n",
      "      the copyright owner. For the purposes of this definition, \"submitted\"\n",
      "      means any form of electronic, verbal, or written communication sent\n",
      "      to the Licensor or its representatives, including but not limited to\n",
      "      communication on electronic mailing lists, source code control systems,\n",
      "      and issue tracking systems that are managed by, or on behalf of, the\n",
      "      Licensor for the purpose of discussing and improving the Work, but\n",
      "      excluding communication that is conspicuously marked or otherwise\n",
      "      designated in writing by the copyright owner as \"Not a Contribution.\"\n",
      "\n",
      "      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n",
      "      on behalf of whom a Contribution has been received by Licensor and\n",
      "      subsequently incorporated within the Work.\n",
      "\n",
      "   2. Grant of Copyright License. Subject to the terms and conditions of\n",
      "      this License, each Contributor hereby grants to You a perpetual,\n",
      "      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n",
      "      copyright license to reproduce, prepare Derivative Works of,\n",
      "      publicly display, publicly perform, sublicense, and distribute the\n",
      "      Work and such Derivative Works in Source or Object form.\n",
      "\n",
      "   3. Grant of Patent License. Subject to the terms and conditions of\n",
      "      this License, each Contributor hereby grants to You a perpetual,\n",
      "      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n",
      "      (except as stated in this section) patent license to make, have made,\n",
      "      use, offer to sell, sell, import, and otherwise transfer the Work,\n",
      "      where such license applies only to those patent claims licensable\n",
      "      by such Contributor that are necessarily infringed by their\n",
      "      Contribution(s) alone or by combination of their Contribution(s)\n",
      "      with the Work to which such Contribution(s) was submitted. If You\n",
      "      institute patent litigation against any entity (including a\n",
      "      cross-claim or counterclaim in a lawsuit) alleging that the Work\n",
      "      or a Contribution incorporated within the Work constitutes direct\n",
      "      or contributory patent infringement, then any patent licenses\n",
      "      granted to You under this License for that Work shall terminate\n",
      "      as of the date such litigation is filed.\n",
      "\n",
      "   4. Redistribution. You may reproduce and distribute copies of the\n",
      "      Work or Derivative Works thereof in any medium, with or without\n",
      "      modifications, and in Source or Object form, provided that You\n",
      "      meet the following conditions:\n",
      "\n",
      "      (a) You must give any other recipients of the Work or\n",
      "          Derivative Works a copy of this License; and\n",
      "\n",
      "      (b) You must cause any modified files to carry prominent notices\n",
      "          stating that You changed the files; and\n",
      "\n",
      "      (c) You must retain, in the Source form of any Derivative Works\n",
      "          that You distribute, all copyright, patent, trademark, and\n",
      "          attribution notices from the Source form of the Work,\n",
      "          excluding those notices that do not pertain to any part of\n",
      "          the Derivative Works; and\n",
      "\n",
      "      (d) If the Work includes a \"NOTICE\" text file as part of its\n",
      "          distribution, then any Derivative Works that You distribute must\n",
      "          include a readable copy of the attribution notices contained\n",
      "          within such NOTICE file, excluding those notices that do not\n",
      "          pertain to any part of the Derivative Works, in at least one\n",
      "          of the following places: within a NOTICE text file distributed\n",
      "          as part of the Derivative Works; within the Source form or\n",
      "          documentation, if provided along with the Derivative Works; or,\n",
      "          within a display generated by the Derivative Works, if and\n",
      "          wherever such third-party notices normally appear. The contents\n",
      "          of the NOTICE file are for informational purposes only and\n",
      "          do not modify the License. You may add Your own attribution\n",
      "          notices within Derivative Works that You distribute, alongside\n",
      "          or as an addendum to the NOTICE text from the Work, provided\n",
      "          that such additional attribution notices cannot be construed\n",
      "          as modifying the License.\n",
      "\n",
      "      You may add Your own copyright statement to Your modifications and\n",
      "      may provide additional or different license terms and conditions\n",
      "      for use, reproduction, or distribution of Your modifications, or\n",
      "      for any such Derivative Works as a whole, provided Your use,\n",
      "      reproduction, and distribution of the Work otherwise complies with\n",
      "      the conditions stated in this License.\n",
      "\n",
      "   5. Submission of Contributions. Unless You explicitly state otherwise,\n",
      "      any Contribution intentionally submitted for inclusion in the Work\n",
      "      by You to the Licensor shall be under the terms and conditions of\n",
      "      this License, without any additional terms or conditions.\n",
      "      Notwithstanding the above, nothing herein shall supersede or modify\n",
      "      the terms of any separate license agreement you may have executed\n",
      "      with Licensor regarding such Contributions.\n",
      "\n",
      "   6. Trademarks. This License does not grant permission to use the trade\n",
      "      names, trademarks, service marks, or product names of the Licensor,\n",
      "      except as required for reasonable and customary use in describing the\n",
      "      origin of the Work and reproducing the content of the NOTICE file.\n",
      "\n",
      "   7. Disclaimer of Warranty. Unless required by applicable law or\n",
      "      agreed to in writing, Licensor provides the Work (and each\n",
      "      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n",
      "      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n",
      "      implied, including, without limitation, any warranties or conditions\n",
      "      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n",
      "      PARTICULAR PURPOSE. You are solely responsible for determining the\n",
      "      appropriateness of using or redistributing the Work and assume any\n",
      "      risks associated with Your exercise of permissions under this License.\n",
      "\n",
      "   8. Limitation of Liability. In no event and under no legal theory,\n",
      "      whether in tort (including negligence), contract, or otherwise,\n",
      "      unless required by applicable law (such as deliberate and grossly\n",
      "      negligent acts) or agreed to in writing, shall any Contributor be\n",
      "      liable to You for damages, including any direct, indirect, special,\n",
      "      incidental, or consequential damages of any character arising as a\n",
      "      result of this License or out of the use or inability to use the\n",
      "      Work (including but not limited to damages for loss of goodwill,\n",
      "      work stoppage, computer failure or malfunction, or any and all\n",
      "      other commercial damages or losses), even if such Contributor\n",
      "      has been advised of the possibility of such damages.\n",
      "\n",
      "   9. Accepting Warranty or Additional Liability. While redistributing\n",
      "      the Work or Derivative Works thereof, You may choose to offer,\n",
      "      and charge a fee for, acceptance of support, warranty, indemnity,\n",
      "      or other liability obligations and/or rights consistent with this\n",
      "      License. However, in accepting such obligations, You may act only\n",
      "      on Your own behalf and on Your sole responsibility, not on behalf\n",
      "      of any other Contributor, and only if You agree to indemnify,\n",
      "      defend, and hold each Contributor harmless for any liability\n",
      "      incurred by, or claims asserted against, such Contributor by reason\n",
      "      of your accepting any such warranty or additional liability.\n",
      "\n",
      "   END OF TERMS AND CONDITIONS\n",
      "\n",
      "   APPENDIX: How to apply the Apache License to your work.\n",
      "\n",
      "      To apply the Apache License to your work, attach the following\n",
      "      boilerplate notice, with the fields enclosed by brackets \"[]\"\n",
      "      replaced with your own identifying information. (Don't include\n",
      "      the brackets!)  The text should be enclosed in the appropriate\n",
      "      comment syntax for the file format. We also recommend that a\n",
      "      file or class name and description of purpose be included on the\n",
      "      same \"printed page\" as the copyright notice for easier\n",
      "      identification within third-party archives.\n",
      "\n",
      "   Copyright [yyyy] [name of copyright owner]\n",
      "\n",
      "   Licensed under the Apache License, Version 2.0 (the \"License\");\n",
      "   you may not use this file except in compliance with the License.\n",
      "   You may obtain a copy of the License at\n",
      "\n",
      "       http://www.apache.org/licenses/LICENSE-2.0\n",
      "\n",
      "   Unless required by applicable law or agreed to in writing, software\n",
      "   distributed under the License is distributed on an \"AS IS\" BASIS,\n",
      "   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
      "   See the License for the specific language governing permissions and\n",
      "   limitations under the License.\n",
      "\n",
      "--------------------------------------------------------------------------------\n",
      "This product bundles various third-party components under other open source\n",
      "licenses. This section summarizes those components and their licenses.\n",
      "See licenses/ for text of these licenses.\n",
      "\n",
      "\n",
      "Apache Software Foundation License 2.0\n",
      "--------------------------------------\n",
      "\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java\n",
      "\n",
      "\n",
      "BSD 2-Clause\n",
      "------------\n",
      "\n",
      "hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h}\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h\n",
      "\n",
      "\n",
      "BSD 3-Clause\n",
      "------------\n",
      "\n",
      "hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/*\n",
      "hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc\n",
      "hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h\n",
      "hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h}\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js\n",
      "\n",
      "\n",
      "MIT License\n",
      "-----------\n",
      "\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css\n",
      "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL\n",
      "\n",
      "uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2)\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch]\n",
      "\n",
      "Boost Software License, Version 1.0\n",
      "-------------\n",
      "asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2)\n",
      "rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13)\n",
      "tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2)\n",
      "\n",
      "Public Domain\n",
      "-------------\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js\n"
     ]
    }
   ],
   "source": [
    "!hdfs dfs -cat hdfs://main:9000/data/LICENSE.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "7649de66-99fc-468f-b3bb-d53ac6104651",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14.9 K  44.6 K  hdfs://main:9000/data/LICENSE.txt\n"
     ]
    }
   ],
   "source": [
    "!hdfs dfs -du -h hdfs://main:9000/data/LICENSE.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "077c4587-942a-40d8-80be-9c663ee10c3f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connecting to namenode via http://main:9870/fsck?ugi=root&path=%2Fdata%2FLICENSE.txt\n",
      "FSCK started by root (auth:SIMPLE) from /172.18.0.3 for path /data/LICENSE.txt at Wed Mar 05 17:16:04 GMT 2025\n",
      "\n",
      "\n",
      "/data/LICENSE.txt:  Under replicated BP-478178705-172.18.0.2-1741194447522:blk_1073741825_1001. Target Replicas is 3 but found 1 live replica(s), 0 decommissioned replica(s), 0 decommissioning replica(s).\n",
      "\n",
      "Status: HEALTHY\n",
      " Number of data-nodes:\t1\n",
      " Number of racks:\t\t1\n",
      " Total dirs:\t\t\t0\n",
      " Total symlinks:\t\t0\n",
      "\n",
      "Replicated Blocks:\n",
      " Total size:\t15217 B\n",
      " Total files:\t1\n",
      " Total blocks (validated):\t1 (avg. block size 15217 B)\n",
      " Minimally replicated blocks:\t1 (100.0 %)\n",
      " Over-replicated blocks:\t0 (0.0 %)\n",
      " Under-replicated blocks:\t1 (100.0 %)\n",
      " Mis-replicated blocks:\t\t0 (0.0 %)\n",
      " Default replication factor:\t3\n",
      " Average block replication:\t1.0\n",
      " Missing blocks:\t\t0\n",
      " Corrupt blocks:\t\t0\n",
      " Missing replicas:\t\t2 (66.666664 %)\n",
      " Blocks queued for replication:\t0\n",
      "\n",
      "Erasure Coded Block Groups:\n",
      " Total size:\t0 B\n",
      " Total files:\t0\n",
      " Total block groups (validated):\t0\n",
      " Minimally erasure-coded block groups:\t0\n",
      " Over-erasure-coded block groups:\t0\n",
      " Under-erasure-coded block groups:\t0\n",
      " Unsatisfactory placement block groups:\t0\n",
      " Average block group size:\t0.0\n",
      " Missing block groups:\t\t0\n",
      " Corrupt block groups:\t\t0\n",
      " Missing internal blocks:\t0\n",
      " Blocks queued for replication:\t0\n",
      "FSCK ended at Wed Mar 05 17:16:04 GMT 2025 in 13 milliseconds\n",
      "\n",
      "\n",
      "The filesystem under path '/data/LICENSE.txt' is HEALTHY\n"
     ]
    }
   ],
   "source": [
    "!hdfs fsck hdfs://main:9000/data/LICENSE.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e2c9c022-158b-4181-bd4c-fa451ba6c783",
   "metadata": {},
   "outputs": [],
   "source": [
    "!hdfs dfs -D dfs.replication=1 -cp /hadoop-3.3.6/LICENSE.txt hdfs://main:9000/data/test.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e0d4797c-8bde-4588-96af-3ad1134dcb2b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connecting to namenode via http://main:9870/fsck?ugi=root&path=%2Fdata%2Ftest.txt\n",
      "FSCK started by root (auth:SIMPLE) from /172.18.0.3 for path /data/test.txt at Wed Mar 05 17:18:35 GMT 2025\n",
      "\n",
      "\n",
      "Status: HEALTHY\n",
      " Number of data-nodes:\t1\n",
      " Number of racks:\t\t1\n",
      " Total dirs:\t\t\t0\n",
      " Total symlinks:\t\t0\n",
      "\n",
      "Replicated Blocks:\n",
      " Total size:\t15217 B\n",
      " Total files:\t1\n",
      " Total blocks (validated):\t1 (avg. block size 15217 B)\n",
      " Minimally replicated blocks:\t1 (100.0 %)\n",
      " Over-replicated blocks:\t0 (0.0 %)\n",
      " Under-replicated blocks:\t0 (0.0 %)\n",
      " Mis-replicated blocks:\t\t0 (0.0 %)\n",
      " Default replication factor:\t3\n",
      " Average block replication:\t1.0\n",
      " Missing blocks:\t\t0\n",
      " Corrupt blocks:\t\t0\n",
      " Missing replicas:\t\t0 (0.0 %)\n",
      " Blocks queued for replication:\t0\n",
      "\n",
      "Erasure Coded Block Groups:\n",
      " Total size:\t0 B\n",
      " Total files:\t0\n",
      " Total block groups (validated):\t0\n",
      " Minimally erasure-coded block groups:\t0\n",
      " Over-erasure-coded block groups:\t0\n",
      " Under-erasure-coded block groups:\t0\n",
      " Unsatisfactory placement block groups:\t0\n",
      " Average block group size:\t0.0\n",
      " Missing block groups:\t\t0\n",
      " Corrupt block groups:\t\t0\n",
      " Missing internal blocks:\t0\n",
      " Blocks queued for replication:\t0\n",
      "FSCK ended at Wed Mar 05 17:18:35 GMT 2025 in 2 milliseconds\n",
      "\n",
      "\n",
      "The filesystem under path '/data/test.txt' is HEALTHY\n"
     ]
    }
   ],
   "source": [
    "!hdfs fsck hdfs://main:9000/data/test.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50efdb28-cfdb-4172-8904-7718fce705fc",
   "metadata": {},
   "source": [
    "# WebHDFS Examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ff78a66f-9300-463d-a508-677231832932",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HTTP/1.1 200 OK\n",
      "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n",
      "\u001b[1mCache-Control\u001b[0m: no-cache\n",
      "\u001b[1mExpires\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n",
      "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n",
      "\u001b[1mPragma\u001b[0m: no-cache\n",
      "\u001b[1mX-Content-Type-Options\u001b[0m: nosniff\n",
      "\u001b[1mX-FRAME-OPTIONS\u001b[0m: SAMEORIGIN\n",
      "\u001b[1mX-XSS-Protection\u001b[0m: 1; mode=block\n",
      "\u001b[1mContent-Type\u001b[0m: application/json\n",
      "\u001b[1mTransfer-Encoding\u001b[0m: chunked\n",
      "\n",
      "{\"FileStatuses\":{\"FileStatus\":[\n",
      "{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\n",
      "{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\n",
      "{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\n",
      "]}}\n"
     ]
    }
   ],
   "source": [
    "! curl -i  \"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "3d957b70-a8e0-4147-838d-e4e9eb137ed4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# curl -i  \"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "42dc77db-565e-4359-a52f-b869a642ad08",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b'{\"FileStatuses\":{\"FileStatus\":[\\n{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\\n]}}\\n'"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import requests\n",
    "r = requests.get(\"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\")\n",
    "r.raise_for_status()\n",
    "r.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "6dc4b0f6-a691-4e19-8d5c-0715d7a46cd1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"FileStatuses\":{\"FileStatus\":[\\n{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\\n]}}\\n'"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "e04a4e12-6478-435c-a07e-dc7b024b0719",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LICENSE.txt\n",
      "single.txt\n",
      "test.txt\n"
     ]
    }
   ],
   "source": [
    "for entry in r.json()['FileStatuses']['FileStatus']:\n",
    "    print(entry[\"pathSuffix\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "5800cd5f-b93b-4c42-af89-5558075b1f1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# open+read a file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1addea41-bc58-40c7-93ca-52f0b6bc1f9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "#curl -i -L \"http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN\n",
    "#                    [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>][&noredirect=<true|false>]\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "a9602f15-a008-43b0-b76c-2ba0da2c3685",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HTTP/1.1 307 Temporary Redirect\n",
      "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n",
      "\u001b[1mCache-Control\u001b[0m: no-cache\n",
      "\u001b[1mExpires\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n",
      "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n",
      "\u001b[1mPragma\u001b[0m: no-cache\n",
      "\u001b[1mX-Content-Type-Options\u001b[0m: nosniff\n",
      "\u001b[1mX-FRAME-OPTIONS\u001b[0m: SAMEORIGIN\n",
      "\u001b[1mX-XSS-Protection\u001b[0m: 1; mode=block\n",
      "\u001b[1mLocation\u001b[0m: \u001b]8;;http://main:9864/webhdfs/v1/data/test.txt?op=OPEN&namenoderpcaddress=main:9000&length=200&offset=0\u001b\\http://main:9864/webhdfs/v1/data/test.txt?op=OPEN&namenoderpcaddress=main:9000&length=200&offset=0\n",
      "\u001b]8;;\u001b\\\u001b[1mContent-Type\u001b[0m: application/octet-stream\n",
      "\u001b[1mContent-Length\u001b[0m: 0\n",
      "\n",
      "HTTP/1.1 200 OK\n",
      "\u001b[1mAccess-Control-Allow-Methods\u001b[0m: GET\n",
      "\u001b[1mAccess-Control-Allow-Origin\u001b[0m: *\n",
      "\u001b[1mContent-Type\u001b[0m: application/octet-stream\n",
      "\u001b[1mConnection\u001b[0m: close\n",
      "\u001b[1mContent-Length\u001b[0m: 200\n",
      "\n",
      "\n",
      "                                 Apache License\n",
      "                           Version 2.0, January 2004\n",
      "                        http://www.apache.org/licenses/\n",
      "\n",
      "   TERMS AND CONDITIONS FOR USE, REPRODUC"
     ]
    }
   ],
   "source": [
    "! curl -i -L \"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=0&length=200\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "13543696-8fff-455d-9d90-d25210893b27",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "                                 Apache License\n",
      "                           Version 2.0, January 2004\n",
      "                        http://www.apache.org/licenses/\n",
      "\n",
      "   TERMS AND CONDITIONS FOR USE, REPRODUC\n"
     ]
    }
   ],
   "source": [
    "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=0&length=200\")\n",
    "r.raise_for_status()\n",
    "print(r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "313a7b35-82ac-4221-86a8-8ebdf50a69bc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                             Apache License\n",
      "                           Version 2.0, January 2004\n",
      "                        http://www.apache.org/licenses/\n",
      "\n",
      "   TERMS AND CONDITIONS FOR USE, REPRODUCTION,\n"
     ]
    }
   ],
   "source": [
    "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=5&length=200\")\n",
    "r.raise_for_status()\n",
    "print(r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "b8cbf271-28b5-4af8-b0a8-06d09b5993fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# where are all the DataNodes for all the blocks of a file?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "7ce73613-cc98-42f8-9c20-95eba1346240",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'BlockLocations': {'BlockLocation': [{'topologyPaths': ['/default-rack/172.18.0.2:9866'],\n",
       "    'corrupt': False,\n",
       "    'cachedHosts': [],\n",
       "    'names': ['172.18.0.2:9866'],\n",
       "    'offset': 0,\n",
       "    'hosts': ['main'],\n",
       "    'length': 15217,\n",
       "    'storageTypes': ['DISK']}]}}"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=GETFILEBLOCKLOCATIONS\")\n",
    "r.raise_for_status()\n",
    "r.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "367e11ec-ef10-4a0a-b90d-ecbabc3dcb8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DataNodes for this block: ['main']\n"
     ]
    }
   ],
   "source": [
    "for logical_block in r.json()['BlockLocations']['BlockLocation']:\n",
    "    print(\"DataNodes for this block:\", logical_block[\"hosts\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28d9691f-0ef6-497d-8cd5-808c46b2844c",
   "metadata": {},
   "source": [
    "# PyArrow Examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "0e9e9a93-0943-44e3-b282-a978a3802d85",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyarrow as pa\n",
    "import pyarrow.fs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "f25a555d-93d0-4e01-8b37-bea11603ddfd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-03-05 17:37:01,372 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
     ]
    }
   ],
   "source": [
    "# other options: replication=????, default_block_size=????\n",
    "hdfs = pa.fs.HadoopFileSystem(\"main\", 9000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "fc8df378-1a0a-43cc-bf89-eada00bc3355",
   "metadata": {},
   "outputs": [],
   "source": [
    "from io import BufferedReader, TextIOWrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "4776be11-61af-46ca-9493-4485c97589d1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "                                 Apache License\n",
      "                           Version 2.0, January 2004\n",
      "                        http://www.apache.org/licenses/\n",
      "\n",
      "   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n",
      "\n",
      "   1. Definitions.\n",
      "\n",
      "      \"License\" shall mean the terms and conditions for use, reproduction,\n",
      "      and distribution as defined by Sections 1 through 9 of this document.\n",
      "\n",
      "      \"Licensor\" shall mean the copyright owner or entity authorized by\n",
      "      the copyright owner that is granting the License.\n",
      "\n",
      "      \"Legal Entity\" shall mean the union of the acting entity and all\n",
      "      other entities that control, are controlled by, or are under common\n",
      "      control with that entity. For the purposes of this definition,\n",
      "      \"control\" means (i) the power, direct or indirect, to cause the\n",
      "      direction or management of such entity, whether by contract or\n",
      "      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n",
      "      outstanding shares, or (iii) beneficial ownership of such entity.\n",
      "\n",
      "      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n",
      "      exercising permissions granted by this License.\n",
      "\n",
      "      \"Source\" form shall mean the preferred form for making modifications,\n",
      "      including but not limited to software source code, documentation\n",
      "      source, and configuration files.\n",
      "\n",
      "      \"Object\" form shall mean any form resulting from mechanical\n",
      "      transformation or translation of a Source form, including but\n",
      "      not limited to compiled object code, generated documentation,\n",
      "      and conversions to other media types.\n",
      "\n",
      "      \"Work\" shall mean the work of authorship, whether in Source or\n",
      "      Object form, made available under the License, as indicated by a\n",
      "      copyright notice that is included in or attached to the work\n",
      "      (an example is provided in the Appendix below).\n",
      "\n",
      "      \"Derivative Works\" shall mean any work, whether in Source or Object\n",
      "      form, that is based on (or derived from) the Work and for which the\n",
      "      editorial revisions, annotations, elaborations, or other modifications\n",
      "      represent, as a whole, an original work of authorship. For the purposes\n",
      "      of this License, Derivative Works shall not include works that remain\n",
      "      separable from, or merely link (or bind by name) to the interfaces of,\n",
      "      the Work and Derivative Works thereof.\n",
      "\n",
      "      \"Contribution\" shall mean any work of authorship, including\n",
      "      the original version of the Work and any modifications or additions\n",
      "      to that Work or Derivative Works thereof, that is intentionally\n",
      "      submitted to Licensor for inclusion in the Work by the copyright owner\n",
      "      or by an individual or Legal Entity authorized to submit on behalf of\n",
      "      the copyright owner. For the purposes of this definition, \"submitted\"\n",
      "      means any form of electronic, verbal, or written communication sent\n",
      "      to the Licensor or its representatives, including but not limited to\n",
      "      communication on electronic mailing lists, source code control systems,\n",
      "      and issue tracking systems that are managed by, or on behalf of, the\n",
      "      Licensor for the purpose of discussing and improving the Work, but\n",
      "      excluding communication that is conspicuously marked or otherwise\n",
      "      designated in writing by the copyright owner as \"Not a Contribution.\"\n",
      "\n",
      "      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n",
      "      on behalf of whom a Contribution has been received by Licensor and\n",
      "      subsequently incorporated within the Work.\n",
      "\n",
      "   2. Grant of Copyright License. Subject to the terms and conditions of\n",
      "      this License, each Contributor hereby grants to You a perpetual,\n",
      "      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n",
      "      copyright license to reproduce, prepare Derivative Works of,\n",
      "      publicly display, publicly perform, sublicense, and distribute the\n",
      "      Work and such Derivative Works in Source or Object form.\n",
      "\n",
      "   3. Grant of Patent License. Subject to the terms and conditions of\n",
      "      this License, each Contributor hereby grants to You a perpetual,\n",
      "      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n",
      "      (except as stated in this section) patent license to make, have made,\n",
      "      use, offer to sell, sell, import, and otherwise transfer the Work,\n",
      "      where such license applies only to those patent claims licensable\n",
      "      by such Contributor that are necessarily infringed by their\n",
      "      Contribution(s) alone or by combination of their Contribution(s)\n",
      "      with the Work to which such Contribution(s) was submitted. If You\n",
      "      institute patent litigation against any entity (including a\n",
      "      cross-claim or counterclaim in a lawsuit) alleging that the Work\n",
      "      or a Contribution incorporated within the Work constitutes direct\n",
      "      or contributory patent infringement, then any patent licenses\n",
      "      granted to You under this License for that Work shall terminate\n",
      "      as of the date such litigation is filed.\n",
      "\n",
      "   4. Redistribution. You may reproduce and distribute copies of the\n",
      "      Work or Derivative Works thereof in any medium, with or without\n",
      "      modifications, and in Source or Object form, provided that You\n",
      "      meet the following conditions:\n",
      "\n",
      "      (a) You must give any other recipients of the Work or\n",
      "          Derivative Works a copy of this License; and\n",
      "\n",
      "      (b) You must cause any modified files to carry prominent notices\n",
      "          stating that You changed the files; and\n",
      "\n",
      "      (c) You must retain, in the Source form of any Derivative Works\n",
      "          that You distribute, all copyright, patent, trademark, and\n",
      "          attribution notices from the Source form of the Work,\n",
      "          excluding those notices that do not pertain to any part of\n",
      "          the Derivative Works; and\n",
      "\n",
      "      (d) If the Work includes a \"NOTICE\" text file as part of its\n",
      "          distribution, then any Derivative Works that You distribute must\n",
      "          include a readable copy of the attribution notices contained\n",
      "          within such NOTICE file, excluding those notices that do not\n",
      "          pertain to any part of the Derivative Works, in at least one\n",
      "          of the following places: within a NOTICE text file distributed\n",
      "          as part of the Derivative Works; within the Source form or\n",
      "          documentation, if provided along with the Derivative Works; or,\n",
      "          within a display generated by the Derivative Works, if and\n",
      "          wherever such third-party notices normally appear. The contents\n",
      "          of the NOTICE file are for informational purposes only and\n",
      "          do not modify the License. You may add Your own attribution\n",
      "          notices within Derivative Works that You distribute, alongside\n",
      "          or as an addendum to the NOTICE text from the Work, provided\n",
      "          that such additional attribution notices cannot be construed\n",
      "          as modifying the License.\n",
      "\n",
      "      You may add Your own copyright statement to Your modifications and\n",
      "      may provide additional or different license terms and conditions\n",
      "      for use, reproduction, or distribution of Your modifications, or\n",
      "      for any such Derivative Works as a whole, provided Your use,\n",
      "      reproduction, and distribution of the Work otherwise complies with\n",
      "      the conditions stated in this License.\n",
      "\n",
      "   5. Submission of Contributions. Unless You explicitly state otherwise,\n",
      "      any Contribution intentionally submitted for inclusion in the Work\n",
      "      by You to the Licensor shall be under the terms and conditions of\n",
      "      this License, without any additional terms or conditions.\n",
      "      Notwithstanding the above, nothing herein shall supersede or modify\n",
      "      the terms of any separate license agreement you may have executed\n",
      "      with Licensor regarding such Contributions.\n",
      "\n",
      "   6. Trademarks. This License does not grant permission to use the trade\n",
      "      names, trademarks, service marks, or product names of the Licensor,\n",
      "      except as required for reasonable and customary use in describing the\n",
      "      origin of the Work and reproducing the content of the NOTICE file.\n",
      "\n",
      "   7. Disclaimer of Warranty. Unless required by applicable law or\n",
      "      agreed to in writing, Licensor provides the Work (and each\n",
      "      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n",
      "      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n",
      "      implied, including, without limitation, any warranties or conditions\n",
      "      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n",
      "      PARTICULAR PURPOSE. You are solely responsible for determining the\n",
      "      appropriateness of using or redistributing the Work and assume any\n",
      "      risks associated with Your exercise of permissions under this License.\n",
      "\n",
      "   8. Limitation of Liability. In no event and under no legal theory,\n",
      "      whether in tort (including negligence), contract, or otherwise,\n",
      "      unless required by applicable law (such as deliberate and grossly\n",
      "      negligent acts) or agreed to in writing, shall any Contributor be\n",
      "      liable to You for damages, including any direct, indirect, special,\n",
      "      incidental, or consequential damages of any character arising as a\n",
      "      result of this License or out of the use or inability to use the\n",
      "      Work (including but not limited to damages for loss of goodwill,\n",
      "      work stoppage, computer failure or malfunction, or any and all\n",
      "      other commercial damages or losses), even if such Contributor\n",
      "      has been advised of the possibility of such damages.\n",
      "\n",
      "   9. Accepting Warranty or Additional Liability. While redistributing\n",
      "      the Work or Derivative Works thereof, You may choose to offer,\n",
      "      and charge a fee for, acceptance of support, warranty, indemnity,\n",
      "      or other liability obligations and/or rights consistent with this\n",
      "      License. However, in accepting such obligations, You may act only\n",
      "      on Your own behalf and on Your sole responsibility, not on behalf\n",
      "      of any other Contributor, and only if You agree to indemnify,\n",
      "      defend, and hold each Contributor harmless for any liability\n",
      "      incurred by, or claims asserted against, such Contributor by reason\n",
      "      of your accepting any such warranty or additional liability.\n",
      "\n",
      "   END OF TERMS AND CONDITIONS\n",
      "\n",
      "   APPENDIX: How to apply the Apache License to your work.\n",
      "\n",
      "      To apply the Apache License to your work, attach the following\n",
      "      boilerplate notice, with the fields enclosed by brackets \"[]\"\n",
      "      replaced with your own identifying information. (Don't include\n",
      "      the brackets!)  The text should be enclosed in the appropriate\n",
      "      comment syntax for the file format. We also recommend that a\n",
      "      file or class name and description of purpose be included on the\n",
      "      same \"printed page\" as the copyright notice for easier\n",
      "      identification within third-party archives.\n",
      "\n",
      "   Copyright [yyyy] [name of copyright owner]\n",
      "\n",
      "   Licensed under the Apache License, Version 2.0 (the \"License\");\n",
      "   you may not use this file except in compliance with the License.\n",
      "   You may obtain a copy of the License at\n",
      "\n",
      "       http://www.apache.org/licenses/LICENSE-2.0\n",
      "\n",
      "   Unless required by applicable law or agreed to in writing, software\n",
      "   distributed under the License is distributed on an \"AS IS\" BASIS,\n",
      "   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
      "   See the License for the specific language governing permissions and\n",
      "   limitations under the License.\n",
      "\n",
      "--------------------------------------------------------------------------------\n",
      "This product bundles various third-party components under other open source\n",
      "licenses. This section summarizes those components and their licenses.\n",
      "See licenses/ for text of these licenses.\n",
      "\n",
      "\n",
      "Apache Software Foundation License 2.0\n",
      "--------------------------------------\n",
      "\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java\n",
      "\n",
      "\n",
      "BSD 2-Clause\n",
      "------------\n",
      "\n",
      "hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h}\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h\n",
      "\n",
      "\n",
      "BSD 3-Clause\n",
      "------------\n",
      "\n",
      "hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/*\n",
      "hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc\n",
      "hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h\n",
      "hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h\n",
      "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h}\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js\n",
      "\n",
      "\n",
      "MIT License\n",
      "-----------\n",
      "\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js\n",
      "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css\n",
      "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL\n",
      "\n",
      "uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2)\n",
      "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch]\n",
      "\n",
      "Boost Software License, Version 1.0\n",
      "-------------\n",
      "asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2)\n",
      "rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13)\n",
      "tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2)\n",
      "\n",
      "Public Domain\n",
      "-------------\n",
      "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js\n"
     ]
    }
   ],
   "source": [
    "# for reading: open_input_file, for writing: open_output_stream\n",
    "\n",
    "with hdfs.open_input_file(\"/data/test.txt\") as f:\n",
    "    #print(f.read_at(200, 0))\n",
    "    reader = TextIOWrapper(BufferedReader(f))\n",
    "    for line in reader:\n",
    "        print(line, end=\"\")\n",
    "    # for P4: pq.read_table(f) OR pq.write_table(????, f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}