{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1ed33b62-f09e-44e5-84c2-9b7be07998be", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lec1.ipynb lec2.ipynb\n" ] } ], "source": [ "! ls" ] }, { "cell_type": "markdown", "id": "3c8a9067-a138-43b9-ae21-e6bc6eb5a133", "metadata": {}, "source": [ "# Command Line Examples" ] }, { "cell_type": "code", "execution_count": 6, "id": "11602a88-79ab-4d51-8def-ab1b3bca6a31", "metadata": {}, "outputs": [], "source": [ "!hdfs dfs -mkdir hdfs://main:9000/data" ] }, { "cell_type": "code", "execution_count": 7, "id": "59dca956-2612-49b6-a94c-92dc1dbe4df7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 1 items\n", "drwxr-xr-x - root supergroup 0 2025-03-05 17:12 hdfs://main:9000/data\n" ] } ], "source": [ "!hdfs dfs -ls hdfs://main:9000/" ] }, { "cell_type": "code", "execution_count": 11, "id": "9946811e-f9bb-4c50-a39a-2ebddb543ffa", "metadata": {}, "outputs": [], "source": [ "!hdfs dfs -cp /hadoop-3.3.6/LICENSE.txt hdfs://main:9000/data/" ] }, { "cell_type": "code", "execution_count": 12, "id": "9faef755-777b-47f2-91e3-83366d97d552", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 1 items\n", "-rw-r--r-- 3 root supergroup 15217 2025-03-05 17:13 hdfs://main:9000/data/LICENSE.txt\n" ] } ], "source": [ "!hdfs dfs -ls hdfs://main:9000/data" ] }, { "cell_type": "code", "execution_count": 13, "id": "64807b1e-f323-4271-bf19-7a3aed50cc86", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Apache License\n", " Version 2.0, January 2004\n", " http://www.apache.org/licenses/\n", "\n", " TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n", "\n", " 1. Definitions.\n", "\n", " \"License\" shall mean the terms and conditions for use, reproduction,\n", " and distribution as defined by Sections 1 through 9 of this document.\n", "\n", " \"Licensor\" shall mean the copyright owner or entity authorized by\n", " the copyright owner that is granting the License.\n", "\n", " \"Legal Entity\" shall mean the union of the acting entity and all\n", " other entities that control, are controlled by, or are under common\n", " control with that entity. For the purposes of this definition,\n", " \"control\" means (i) the power, direct or indirect, to cause the\n", " direction or management of such entity, whether by contract or\n", " otherwise, or (ii) ownership of fifty percent (50%) or more of the\n", " outstanding shares, or (iii) beneficial ownership of such entity.\n", "\n", " \"You\" (or \"Your\") shall mean an individual or Legal Entity\n", " exercising permissions granted by this License.\n", "\n", " \"Source\" form shall mean the preferred form for making modifications,\n", " including but not limited to software source code, documentation\n", " source, and configuration files.\n", "\n", " \"Object\" form shall mean any form resulting from mechanical\n", " transformation or translation of a Source form, including but\n", " not limited to compiled object code, generated documentation,\n", " and conversions to other media types.\n", "\n", " \"Work\" shall mean the work of authorship, whether in Source or\n", " Object form, made available under the License, as indicated by a\n", " copyright notice that is included in or attached to the work\n", " (an example is provided in the Appendix below).\n", "\n", " \"Derivative Works\" shall mean any work, whether in Source or Object\n", " form, that is based on (or derived from) the Work and for which the\n", " editorial revisions, annotations, elaborations, or other modifications\n", " represent, as a whole, an original work of authorship. For the purposes\n", " of this License, Derivative Works shall not include works that remain\n", " separable from, or merely link (or bind by name) to the interfaces of,\n", " the Work and Derivative Works thereof.\n", "\n", " \"Contribution\" shall mean any work of authorship, including\n", " the original version of the Work and any modifications or additions\n", " to that Work or Derivative Works thereof, that is intentionally\n", " submitted to Licensor for inclusion in the Work by the copyright owner\n", " or by an individual or Legal Entity authorized to submit on behalf of\n", " the copyright owner. For the purposes of this definition, \"submitted\"\n", " means any form of electronic, verbal, or written communication sent\n", " to the Licensor or its representatives, including but not limited to\n", " communication on electronic mailing lists, source code control systems,\n", " and issue tracking systems that are managed by, or on behalf of, the\n", " Licensor for the purpose of discussing and improving the Work, but\n", " excluding communication that is conspicuously marked or otherwise\n", " designated in writing by the copyright owner as \"Not a Contribution.\"\n", "\n", " \"Contributor\" shall mean Licensor and any individual or Legal Entity\n", " on behalf of whom a Contribution has been received by Licensor and\n", " subsequently incorporated within the Work.\n", "\n", " 2. Grant of Copyright License. Subject to the terms and conditions of\n", " this License, each Contributor hereby grants to You a perpetual,\n", " worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n", " copyright license to reproduce, prepare Derivative Works of,\n", " publicly display, publicly perform, sublicense, and distribute the\n", " Work and such Derivative Works in Source or Object form.\n", "\n", " 3. Grant of Patent License. Subject to the terms and conditions of\n", " this License, each Contributor hereby grants to You a perpetual,\n", " worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n", " (except as stated in this section) patent license to make, have made,\n", " use, offer to sell, sell, import, and otherwise transfer the Work,\n", " where such license applies only to those patent claims licensable\n", " by such Contributor that are necessarily infringed by their\n", " Contribution(s) alone or by combination of their Contribution(s)\n", " with the Work to which such Contribution(s) was submitted. If You\n", " institute patent litigation against any entity (including a\n", " cross-claim or counterclaim in a lawsuit) alleging that the Work\n", " or a Contribution incorporated within the Work constitutes direct\n", " or contributory patent infringement, then any patent licenses\n", " granted to You under this License for that Work shall terminate\n", " as of the date such litigation is filed.\n", "\n", " 4. Redistribution. You may reproduce and distribute copies of the\n", " Work or Derivative Works thereof in any medium, with or without\n", " modifications, and in Source or Object form, provided that You\n", " meet the following conditions:\n", "\n", " (a) You must give any other recipients of the Work or\n", " Derivative Works a copy of this License; and\n", "\n", " (b) You must cause any modified files to carry prominent notices\n", " stating that You changed the files; and\n", "\n", " (c) You must retain, in the Source form of any Derivative Works\n", " that You distribute, all copyright, patent, trademark, and\n", " attribution notices from the Source form of the Work,\n", " excluding those notices that do not pertain to any part of\n", " the Derivative Works; and\n", "\n", " (d) If the Work includes a \"NOTICE\" text file as part of its\n", " distribution, then any Derivative Works that You distribute must\n", " include a readable copy of the attribution notices contained\n", " within such NOTICE file, excluding those notices that do not\n", " pertain to any part of the Derivative Works, in at least one\n", " of the following places: within a NOTICE text file distributed\n", " as part of the Derivative Works; within the Source form or\n", " documentation, if provided along with the Derivative Works; or,\n", " within a display generated by the Derivative Works, if and\n", " wherever such third-party notices normally appear. The contents\n", " of the NOTICE file are for informational purposes only and\n", " do not modify the License. You may add Your own attribution\n", " notices within Derivative Works that You distribute, alongside\n", " or as an addendum to the NOTICE text from the Work, provided\n", " that such additional attribution notices cannot be construed\n", " as modifying the License.\n", "\n", " You may add Your own copyright statement to Your modifications and\n", " may provide additional or different license terms and conditions\n", " for use, reproduction, or distribution of Your modifications, or\n", " for any such Derivative Works as a whole, provided Your use,\n", " reproduction, and distribution of the Work otherwise complies with\n", " the conditions stated in this License.\n", "\n", " 5. Submission of Contributions. Unless You explicitly state otherwise,\n", " any Contribution intentionally submitted for inclusion in the Work\n", " by You to the Licensor shall be under the terms and conditions of\n", " this License, without any additional terms or conditions.\n", " Notwithstanding the above, nothing herein shall supersede or modify\n", " the terms of any separate license agreement you may have executed\n", " with Licensor regarding such Contributions.\n", "\n", " 6. Trademarks. This License does not grant permission to use the trade\n", " names, trademarks, service marks, or product names of the Licensor,\n", " except as required for reasonable and customary use in describing the\n", " origin of the Work and reproducing the content of the NOTICE file.\n", "\n", " 7. Disclaimer of Warranty. Unless required by applicable law or\n", " agreed to in writing, Licensor provides the Work (and each\n", " Contributor provides its Contributions) on an \"AS IS\" BASIS,\n", " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n", " implied, including, without limitation, any warranties or conditions\n", " of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n", " PARTICULAR PURPOSE. You are solely responsible for determining the\n", " appropriateness of using or redistributing the Work and assume any\n", " risks associated with Your exercise of permissions under this License.\n", "\n", " 8. Limitation of Liability. In no event and under no legal theory,\n", " whether in tort (including negligence), contract, or otherwise,\n", " unless required by applicable law (such as deliberate and grossly\n", " negligent acts) or agreed to in writing, shall any Contributor be\n", " liable to You for damages, including any direct, indirect, special,\n", " incidental, or consequential damages of any character arising as a\n", " result of this License or out of the use or inability to use the\n", " Work (including but not limited to damages for loss of goodwill,\n", " work stoppage, computer failure or malfunction, or any and all\n", " other commercial damages or losses), even if such Contributor\n", " has been advised of the possibility of such damages.\n", "\n", " 9. Accepting Warranty or Additional Liability. While redistributing\n", " the Work or Derivative Works thereof, You may choose to offer,\n", " and charge a fee for, acceptance of support, warranty, indemnity,\n", " or other liability obligations and/or rights consistent with this\n", " License. However, in accepting such obligations, You may act only\n", " on Your own behalf and on Your sole responsibility, not on behalf\n", " of any other Contributor, and only if You agree to indemnify,\n", " defend, and hold each Contributor harmless for any liability\n", " incurred by, or claims asserted against, such Contributor by reason\n", " of your accepting any such warranty or additional liability.\n", "\n", " END OF TERMS AND CONDITIONS\n", "\n", " APPENDIX: How to apply the Apache License to your work.\n", "\n", " To apply the Apache License to your work, attach the following\n", " boilerplate notice, with the fields enclosed by brackets \"[]\"\n", " replaced with your own identifying information. (Don't include\n", " the brackets!) The text should be enclosed in the appropriate\n", " comment syntax for the file format. We also recommend that a\n", " file or class name and description of purpose be included on the\n", " same \"printed page\" as the copyright notice for easier\n", " identification within third-party archives.\n", "\n", " Copyright [yyyy] [name of copyright owner]\n", "\n", " Licensed under the Apache License, Version 2.0 (the \"License\");\n", " you may not use this file except in compliance with the License.\n", " You may obtain a copy of the License at\n", "\n", " http://www.apache.org/licenses/LICENSE-2.0\n", "\n", " Unless required by applicable law or agreed to in writing, software\n", " distributed under the License is distributed on an \"AS IS\" BASIS,\n", " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", " See the License for the specific language governing permissions and\n", " limitations under the License.\n", "\n", "--------------------------------------------------------------------------------\n", "This product bundles various third-party components under other open source\n", "licenses. This section summarizes those components and their licenses.\n", "See licenses/ for text of these licenses.\n", "\n", "\n", "Apache Software Foundation License 2.0\n", "--------------------------------------\n", "\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java\n", "\n", "\n", "BSD 2-Clause\n", "------------\n", "\n", "hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h}\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h\n", "\n", "\n", "BSD 3-Clause\n", "------------\n", "\n", "hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/*\n", "hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc\n", "hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h\n", "hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h}\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js\n", "\n", "\n", "MIT License\n", "-----------\n", "\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js\n", "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css\n", "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL\n", "\n", "uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2)\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch]\n", "\n", "Boost Software License, Version 1.0\n", "-------------\n", "asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2)\n", "rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13)\n", "tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2)\n", "\n", "Public Domain\n", "-------------\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js\n" ] } ], "source": [ "!hdfs dfs -cat hdfs://main:9000/data/LICENSE.txt" ] }, { "cell_type": "code", "execution_count": 15, "id": "7649de66-99fc-468f-b3bb-d53ac6104651", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14.9 K 44.6 K hdfs://main:9000/data/LICENSE.txt\n" ] } ], "source": [ "!hdfs dfs -du -h hdfs://main:9000/data/LICENSE.txt" ] }, { "cell_type": "code", "execution_count": 16, "id": "077c4587-942a-40d8-80be-9c663ee10c3f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connecting to namenode via http://main:9870/fsck?ugi=root&path=%2Fdata%2FLICENSE.txt\n", "FSCK started by root (auth:SIMPLE) from /172.18.0.3 for path /data/LICENSE.txt at Wed Mar 05 17:16:04 GMT 2025\n", "\n", "\n", "/data/LICENSE.txt: Under replicated BP-478178705-172.18.0.2-1741194447522:blk_1073741825_1001. Target Replicas is 3 but found 1 live replica(s), 0 decommissioned replica(s), 0 decommissioning replica(s).\n", "\n", "Status: HEALTHY\n", " Number of data-nodes:\t1\n", " Number of racks:\t\t1\n", " Total dirs:\t\t\t0\n", " Total symlinks:\t\t0\n", "\n", "Replicated Blocks:\n", " Total size:\t15217 B\n", " Total files:\t1\n", " Total blocks (validated):\t1 (avg. block size 15217 B)\n", " Minimally replicated blocks:\t1 (100.0 %)\n", " Over-replicated blocks:\t0 (0.0 %)\n", " Under-replicated blocks:\t1 (100.0 %)\n", " Mis-replicated blocks:\t\t0 (0.0 %)\n", " Default replication factor:\t3\n", " Average block replication:\t1.0\n", " Missing blocks:\t\t0\n", " Corrupt blocks:\t\t0\n", " Missing replicas:\t\t2 (66.666664 %)\n", " Blocks queued for replication:\t0\n", "\n", "Erasure Coded Block Groups:\n", " Total size:\t0 B\n", " Total files:\t0\n", " Total block groups (validated):\t0\n", " Minimally erasure-coded block groups:\t0\n", " Over-erasure-coded block groups:\t0\n", " Under-erasure-coded block groups:\t0\n", " Unsatisfactory placement block groups:\t0\n", " Average block group size:\t0.0\n", " Missing block groups:\t\t0\n", " Corrupt block groups:\t\t0\n", " Missing internal blocks:\t0\n", " Blocks queued for replication:\t0\n", "FSCK ended at Wed Mar 05 17:16:04 GMT 2025 in 13 milliseconds\n", "\n", "\n", "The filesystem under path '/data/LICENSE.txt' is HEALTHY\n" ] } ], "source": [ "!hdfs fsck hdfs://main:9000/data/LICENSE.txt" ] }, { "cell_type": "code", "execution_count": 21, "id": "e2c9c022-158b-4181-bd4c-fa451ba6c783", "metadata": {}, "outputs": [], "source": [ "!hdfs dfs -D dfs.replication=1 -cp /hadoop-3.3.6/LICENSE.txt hdfs://main:9000/data/test.txt" ] }, { "cell_type": "code", "execution_count": 22, "id": "e0d4797c-8bde-4588-96af-3ad1134dcb2b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connecting to namenode via http://main:9870/fsck?ugi=root&path=%2Fdata%2Ftest.txt\n", "FSCK started by root (auth:SIMPLE) from /172.18.0.3 for path /data/test.txt at Wed Mar 05 17:18:35 GMT 2025\n", "\n", "\n", "Status: HEALTHY\n", " Number of data-nodes:\t1\n", " Number of racks:\t\t1\n", " Total dirs:\t\t\t0\n", " Total symlinks:\t\t0\n", "\n", "Replicated Blocks:\n", " Total size:\t15217 B\n", " Total files:\t1\n", " Total blocks (validated):\t1 (avg. block size 15217 B)\n", " Minimally replicated blocks:\t1 (100.0 %)\n", " Over-replicated blocks:\t0 (0.0 %)\n", " Under-replicated blocks:\t0 (0.0 %)\n", " Mis-replicated blocks:\t\t0 (0.0 %)\n", " Default replication factor:\t3\n", " Average block replication:\t1.0\n", " Missing blocks:\t\t0\n", " Corrupt blocks:\t\t0\n", " Missing replicas:\t\t0 (0.0 %)\n", " Blocks queued for replication:\t0\n", "\n", "Erasure Coded Block Groups:\n", " Total size:\t0 B\n", " Total files:\t0\n", " Total block groups (validated):\t0\n", " Minimally erasure-coded block groups:\t0\n", " Over-erasure-coded block groups:\t0\n", " Under-erasure-coded block groups:\t0\n", " Unsatisfactory placement block groups:\t0\n", " Average block group size:\t0.0\n", " Missing block groups:\t\t0\n", " Corrupt block groups:\t\t0\n", " Missing internal blocks:\t0\n", " Blocks queued for replication:\t0\n", "FSCK ended at Wed Mar 05 17:18:35 GMT 2025 in 2 milliseconds\n", "\n", "\n", "The filesystem under path '/data/test.txt' is HEALTHY\n" ] } ], "source": [ "!hdfs fsck hdfs://main:9000/data/test.txt" ] }, { "cell_type": "markdown", "id": "50efdb28-cfdb-4172-8904-7718fce705fc", "metadata": {}, "source": [ "# WebHDFS Examples" ] }, { "cell_type": "code", "execution_count": 24, "id": "ff78a66f-9300-463d-a508-677231832932", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "HTTP/1.1 200 OK\n", "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n", "\u001b[1mCache-Control\u001b[0m: no-cache\n", "\u001b[1mExpires\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n", "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:22:12 GMT\n", "\u001b[1mPragma\u001b[0m: no-cache\n", "\u001b[1mX-Content-Type-Options\u001b[0m: nosniff\n", "\u001b[1mX-FRAME-OPTIONS\u001b[0m: SAMEORIGIN\n", "\u001b[1mX-XSS-Protection\u001b[0m: 1; mode=block\n", "\u001b[1mContent-Type\u001b[0m: application/json\n", "\u001b[1mTransfer-Encoding\u001b[0m: chunked\n", "\n", "{\"FileStatuses\":{\"FileStatus\":[\n", "{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\n", "{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\n", "{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\n", "]}}\n" ] } ], "source": [ "! curl -i \"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\"" ] }, { "cell_type": "code", "execution_count": 25, "id": "3d957b70-a8e0-4147-838d-e4e9eb137ed4", "metadata": {}, "outputs": [], "source": [ "# curl -i \"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\"" ] }, { "cell_type": "code", "execution_count": 31, "id": "42dc77db-565e-4359-a52f-b869a642ad08", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'{\"FileStatuses\":{\"FileStatus\":[\\n{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\\n]}}\\n'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import requests\n", "r = requests.get(\"http://main:9870/webhdfs/v1/data?op=LISTSTATUS\")\n", "r.raise_for_status()\n", "r.content" ] }, { "cell_type": "code", "execution_count": 32, "id": "6dc4b0f6-a691-4e19-8d5c-0715d7a46cd1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'{\"FileStatuses\":{\"FileStatus\":[\\n{\"accessTime\":1741194819485,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16387,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741194820075,\"owner\":\"root\",\"pathSuffix\":\"LICENSE.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195034403,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16388,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195034487,\"owner\":\"root\",\"pathSuffix\":\"single.txt\",\"permission\":\"644\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"},\\n{\"accessTime\":1741195111543,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16389,\"group\":\"supergroup\",\"length\":15217,\"modificationTime\":1741195111626,\"owner\":\"root\",\"pathSuffix\":\"test.txt\",\"permission\":\"644\",\"replication\":1,\"storagePolicy\":0,\"type\":\"FILE\"}\\n]}}\\n'" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r.text" ] }, { "cell_type": "code", "execution_count": 40, "id": "e04a4e12-6478-435c-a07e-dc7b024b0719", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LICENSE.txt\n", "single.txt\n", "test.txt\n" ] } ], "source": [ "for entry in r.json()['FileStatuses']['FileStatus']:\n", " print(entry[\"pathSuffix\"])" ] }, { "cell_type": "code", "execution_count": 41, "id": "5800cd5f-b93b-4c42-af89-5558075b1f1d", "metadata": {}, "outputs": [], "source": [ "# open+read a file" ] }, { "cell_type": "code", "execution_count": null, "id": "1addea41-bc58-40c7-93ca-52f0b6bc1f9a", "metadata": {}, "outputs": [], "source": [ "#curl -i -L \"http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN\n", "# [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>][&noredirect=<true|false>]\"" ] }, { "cell_type": "code", "execution_count": 47, "id": "a9602f15-a008-43b0-b76c-2ba0da2c3685", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "HTTP/1.1 307 Temporary Redirect\n", "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n", "\u001b[1mCache-Control\u001b[0m: no-cache\n", "\u001b[1mExpires\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n", "\u001b[1mDate\u001b[0m: Wed, 05 Mar 2025 17:31:15 GMT\n", "\u001b[1mPragma\u001b[0m: no-cache\n", "\u001b[1mX-Content-Type-Options\u001b[0m: nosniff\n", "\u001b[1mX-FRAME-OPTIONS\u001b[0m: SAMEORIGIN\n", "\u001b[1mX-XSS-Protection\u001b[0m: 1; mode=block\n", "\u001b[1mLocation\u001b[0m: \u001b]8;;http://main:9864/webhdfs/v1/data/test.txt?op=OPEN&namenoderpcaddress=main:9000&length=200&offset=0\u001b\\http://main:9864/webhdfs/v1/data/test.txt?op=OPEN&namenoderpcaddress=main:9000&length=200&offset=0\n", "\u001b]8;;\u001b\\\u001b[1mContent-Type\u001b[0m: application/octet-stream\n", "\u001b[1mContent-Length\u001b[0m: 0\n", "\n", "HTTP/1.1 200 OK\n", "\u001b[1mAccess-Control-Allow-Methods\u001b[0m: GET\n", "\u001b[1mAccess-Control-Allow-Origin\u001b[0m: *\n", "\u001b[1mContent-Type\u001b[0m: application/octet-stream\n", "\u001b[1mConnection\u001b[0m: close\n", "\u001b[1mContent-Length\u001b[0m: 200\n", "\n", "\n", " Apache License\n", " Version 2.0, January 2004\n", " http://www.apache.org/licenses/\n", "\n", " TERMS AND CONDITIONS FOR USE, REPRODUC" ] } ], "source": [ "! curl -i -L \"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=0&length=200\"" ] }, { "cell_type": "code", "execution_count": 48, "id": "13543696-8fff-455d-9d90-d25210893b27", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Apache License\n", " Version 2.0, January 2004\n", " http://www.apache.org/licenses/\n", "\n", " TERMS AND CONDITIONS FOR USE, REPRODUC\n" ] } ], "source": [ "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=0&length=200\")\n", "r.raise_for_status()\n", "print(r.text)" ] }, { "cell_type": "code", "execution_count": 49, "id": "313a7b35-82ac-4221-86a8-8ebdf50a69bc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Apache License\n", " Version 2.0, January 2004\n", " http://www.apache.org/licenses/\n", "\n", " TERMS AND CONDITIONS FOR USE, REPRODUCTION,\n" ] } ], "source": [ "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=OPEN&offset=5&length=200\")\n", "r.raise_for_status()\n", "print(r.text)" ] }, { "cell_type": "code", "execution_count": 53, "id": "b8cbf271-28b5-4af8-b0a8-06d09b5993fc", "metadata": {}, "outputs": [], "source": [ "# where are all the DataNodes for all the blocks of a file?" ] }, { "cell_type": "code", "execution_count": 55, "id": "7ce73613-cc98-42f8-9c20-95eba1346240", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'BlockLocations': {'BlockLocation': [{'topologyPaths': ['/default-rack/172.18.0.2:9866'],\n", " 'corrupt': False,\n", " 'cachedHosts': [],\n", " 'names': ['172.18.0.2:9866'],\n", " 'offset': 0,\n", " 'hosts': ['main'],\n", " 'length': 15217,\n", " 'storageTypes': ['DISK']}]}}" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r = requests.get(\"http://main:9870/webhdfs/v1/data/test.txt?op=GETFILEBLOCKLOCATIONS\")\n", "r.raise_for_status()\n", "r.json()" ] }, { "cell_type": "code", "execution_count": 59, "id": "367e11ec-ef10-4a0a-b90d-ecbabc3dcb8d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataNodes for this block: ['main']\n" ] } ], "source": [ "for logical_block in r.json()['BlockLocations']['BlockLocation']:\n", " print(\"DataNodes for this block:\", logical_block[\"hosts\"])" ] }, { "cell_type": "markdown", "id": "28d9691f-0ef6-497d-8cd5-808c46b2844c", "metadata": {}, "source": [ "# PyArrow Examples" ] }, { "cell_type": "code", "execution_count": 60, "id": "0e9e9a93-0943-44e3-b282-a978a3802d85", "metadata": {}, "outputs": [], "source": [ "import pyarrow as pa\n", "import pyarrow.fs" ] }, { "cell_type": "code", "execution_count": 61, "id": "f25a555d-93d0-4e01-8b37-bea11603ddfd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-03-05 17:37:01,372 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] } ], "source": [ "# other options: replication=????, default_block_size=????\n", "hdfs = pa.fs.HadoopFileSystem(\"main\", 9000)" ] }, { "cell_type": "code", "execution_count": 64, "id": "fc8df378-1a0a-43cc-bf89-eada00bc3355", "metadata": {}, "outputs": [], "source": [ "from io import BufferedReader, TextIOWrapper" ] }, { "cell_type": "code", "execution_count": 67, "id": "4776be11-61af-46ca-9493-4485c97589d1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Apache License\n", " Version 2.0, January 2004\n", " http://www.apache.org/licenses/\n", "\n", " TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n", "\n", " 1. Definitions.\n", "\n", " \"License\" shall mean the terms and conditions for use, reproduction,\n", " and distribution as defined by Sections 1 through 9 of this document.\n", "\n", " \"Licensor\" shall mean the copyright owner or entity authorized by\n", " the copyright owner that is granting the License.\n", "\n", " \"Legal Entity\" shall mean the union of the acting entity and all\n", " other entities that control, are controlled by, or are under common\n", " control with that entity. For the purposes of this definition,\n", " \"control\" means (i) the power, direct or indirect, to cause the\n", " direction or management of such entity, whether by contract or\n", " otherwise, or (ii) ownership of fifty percent (50%) or more of the\n", " outstanding shares, or (iii) beneficial ownership of such entity.\n", "\n", " \"You\" (or \"Your\") shall mean an individual or Legal Entity\n", " exercising permissions granted by this License.\n", "\n", " \"Source\" form shall mean the preferred form for making modifications,\n", " including but not limited to software source code, documentation\n", " source, and configuration files.\n", "\n", " \"Object\" form shall mean any form resulting from mechanical\n", " transformation or translation of a Source form, including but\n", " not limited to compiled object code, generated documentation,\n", " and conversions to other media types.\n", "\n", " \"Work\" shall mean the work of authorship, whether in Source or\n", " Object form, made available under the License, as indicated by a\n", " copyright notice that is included in or attached to the work\n", " (an example is provided in the Appendix below).\n", "\n", " \"Derivative Works\" shall mean any work, whether in Source or Object\n", " form, that is based on (or derived from) the Work and for which the\n", " editorial revisions, annotations, elaborations, or other modifications\n", " represent, as a whole, an original work of authorship. For the purposes\n", " of this License, Derivative Works shall not include works that remain\n", " separable from, or merely link (or bind by name) to the interfaces of,\n", " the Work and Derivative Works thereof.\n", "\n", " \"Contribution\" shall mean any work of authorship, including\n", " the original version of the Work and any modifications or additions\n", " to that Work or Derivative Works thereof, that is intentionally\n", " submitted to Licensor for inclusion in the Work by the copyright owner\n", " or by an individual or Legal Entity authorized to submit on behalf of\n", " the copyright owner. For the purposes of this definition, \"submitted\"\n", " means any form of electronic, verbal, or written communication sent\n", " to the Licensor or its representatives, including but not limited to\n", " communication on electronic mailing lists, source code control systems,\n", " and issue tracking systems that are managed by, or on behalf of, the\n", " Licensor for the purpose of discussing and improving the Work, but\n", " excluding communication that is conspicuously marked or otherwise\n", " designated in writing by the copyright owner as \"Not a Contribution.\"\n", "\n", " \"Contributor\" shall mean Licensor and any individual or Legal Entity\n", " on behalf of whom a Contribution has been received by Licensor and\n", " subsequently incorporated within the Work.\n", "\n", " 2. Grant of Copyright License. Subject to the terms and conditions of\n", " this License, each Contributor hereby grants to You a perpetual,\n", " worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n", " copyright license to reproduce, prepare Derivative Works of,\n", " publicly display, publicly perform, sublicense, and distribute the\n", " Work and such Derivative Works in Source or Object form.\n", "\n", " 3. Grant of Patent License. Subject to the terms and conditions of\n", " this License, each Contributor hereby grants to You a perpetual,\n", " worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n", " (except as stated in this section) patent license to make, have made,\n", " use, offer to sell, sell, import, and otherwise transfer the Work,\n", " where such license applies only to those patent claims licensable\n", " by such Contributor that are necessarily infringed by their\n", " Contribution(s) alone or by combination of their Contribution(s)\n", " with the Work to which such Contribution(s) was submitted. If You\n", " institute patent litigation against any entity (including a\n", " cross-claim or counterclaim in a lawsuit) alleging that the Work\n", " or a Contribution incorporated within the Work constitutes direct\n", " or contributory patent infringement, then any patent licenses\n", " granted to You under this License for that Work shall terminate\n", " as of the date such litigation is filed.\n", "\n", " 4. Redistribution. You may reproduce and distribute copies of the\n", " Work or Derivative Works thereof in any medium, with or without\n", " modifications, and in Source or Object form, provided that You\n", " meet the following conditions:\n", "\n", " (a) You must give any other recipients of the Work or\n", " Derivative Works a copy of this License; and\n", "\n", " (b) You must cause any modified files to carry prominent notices\n", " stating that You changed the files; and\n", "\n", " (c) You must retain, in the Source form of any Derivative Works\n", " that You distribute, all copyright, patent, trademark, and\n", " attribution notices from the Source form of the Work,\n", " excluding those notices that do not pertain to any part of\n", " the Derivative Works; and\n", "\n", " (d) If the Work includes a \"NOTICE\" text file as part of its\n", " distribution, then any Derivative Works that You distribute must\n", " include a readable copy of the attribution notices contained\n", " within such NOTICE file, excluding those notices that do not\n", " pertain to any part of the Derivative Works, in at least one\n", " of the following places: within a NOTICE text file distributed\n", " as part of the Derivative Works; within the Source form or\n", " documentation, if provided along with the Derivative Works; or,\n", " within a display generated by the Derivative Works, if and\n", " wherever such third-party notices normally appear. The contents\n", " of the NOTICE file are for informational purposes only and\n", " do not modify the License. You may add Your own attribution\n", " notices within Derivative Works that You distribute, alongside\n", " or as an addendum to the NOTICE text from the Work, provided\n", " that such additional attribution notices cannot be construed\n", " as modifying the License.\n", "\n", " You may add Your own copyright statement to Your modifications and\n", " may provide additional or different license terms and conditions\n", " for use, reproduction, or distribution of Your modifications, or\n", " for any such Derivative Works as a whole, provided Your use,\n", " reproduction, and distribution of the Work otherwise complies with\n", " the conditions stated in this License.\n", "\n", " 5. Submission of Contributions. Unless You explicitly state otherwise,\n", " any Contribution intentionally submitted for inclusion in the Work\n", " by You to the Licensor shall be under the terms and conditions of\n", " this License, without any additional terms or conditions.\n", " Notwithstanding the above, nothing herein shall supersede or modify\n", " the terms of any separate license agreement you may have executed\n", " with Licensor regarding such Contributions.\n", "\n", " 6. Trademarks. This License does not grant permission to use the trade\n", " names, trademarks, service marks, or product names of the Licensor,\n", " except as required for reasonable and customary use in describing the\n", " origin of the Work and reproducing the content of the NOTICE file.\n", "\n", " 7. Disclaimer of Warranty. Unless required by applicable law or\n", " agreed to in writing, Licensor provides the Work (and each\n", " Contributor provides its Contributions) on an \"AS IS\" BASIS,\n", " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n", " implied, including, without limitation, any warranties or conditions\n", " of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n", " PARTICULAR PURPOSE. You are solely responsible for determining the\n", " appropriateness of using or redistributing the Work and assume any\n", " risks associated with Your exercise of permissions under this License.\n", "\n", " 8. Limitation of Liability. In no event and under no legal theory,\n", " whether in tort (including negligence), contract, or otherwise,\n", " unless required by applicable law (such as deliberate and grossly\n", " negligent acts) or agreed to in writing, shall any Contributor be\n", " liable to You for damages, including any direct, indirect, special,\n", " incidental, or consequential damages of any character arising as a\n", " result of this License or out of the use or inability to use the\n", " Work (including but not limited to damages for loss of goodwill,\n", " work stoppage, computer failure or malfunction, or any and all\n", " other commercial damages or losses), even if such Contributor\n", " has been advised of the possibility of such damages.\n", "\n", " 9. Accepting Warranty or Additional Liability. While redistributing\n", " the Work or Derivative Works thereof, You may choose to offer,\n", " and charge a fee for, acceptance of support, warranty, indemnity,\n", " or other liability obligations and/or rights consistent with this\n", " License. However, in accepting such obligations, You may act only\n", " on Your own behalf and on Your sole responsibility, not on behalf\n", " of any other Contributor, and only if You agree to indemnify,\n", " defend, and hold each Contributor harmless for any liability\n", " incurred by, or claims asserted against, such Contributor by reason\n", " of your accepting any such warranty or additional liability.\n", "\n", " END OF TERMS AND CONDITIONS\n", "\n", " APPENDIX: How to apply the Apache License to your work.\n", "\n", " To apply the Apache License to your work, attach the following\n", " boilerplate notice, with the fields enclosed by brackets \"[]\"\n", " replaced with your own identifying information. (Don't include\n", " the brackets!) The text should be enclosed in the appropriate\n", " comment syntax for the file format. We also recommend that a\n", " file or class name and description of purpose be included on the\n", " same \"printed page\" as the copyright notice for easier\n", " identification within third-party archives.\n", "\n", " Copyright [yyyy] [name of copyright owner]\n", "\n", " Licensed under the Apache License, Version 2.0 (the \"License\");\n", " you may not use this file except in compliance with the License.\n", " You may obtain a copy of the License at\n", "\n", " http://www.apache.org/licenses/LICENSE-2.0\n", "\n", " Unless required by applicable law or agreed to in writing, software\n", " distributed under the License is distributed on an \"AS IS\" BASIS,\n", " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", " See the License for the specific language governing permissions and\n", " limitations under the License.\n", "\n", "--------------------------------------------------------------------------------\n", "This product bundles various third-party components under other open source\n", "licenses. This section summarizes those components and their licenses.\n", "See licenses/ for text of these licenses.\n", "\n", "\n", "Apache Software Foundation License 2.0\n", "--------------------------------------\n", "\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/AbstractFuture.java\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java\n", "\n", "\n", "BSD 2-Clause\n", "------------\n", "\n", "hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/lz4/lz4.{c|h}\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h\n", "\n", "\n", "BSD 3-Clause\n", "------------\n", "\n", "hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/bloom/*\n", "hadoop-common-project/hadoop-common/src/main/native/gtest/gtest-all.cc\n", "hadoop-common-project/hadoop-common/src/main/native/gtest/include/gtest/gtest.h\n", "hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_x86.c\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/protobuf/protobuf/cpp_helpers.h\n", "hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/gmock-1.7.0/*/*.{cc|h}\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-v4.1.1.min.js\n", "\n", "\n", "MIT License\n", "-----------\n", "\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/bootstrap-3.4.1\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js\n", "hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js\n", "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css\n", "hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/node_modules/.bin/r.js\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL\n", "\n", "uriparser2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/uriparser2)\n", "hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/cJSON.[ch]\n", "\n", "Boost Software License, Version 1.0\n", "-------------\n", "asio-1.10.2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/asio-1.10.2)\n", "rapidxml-1.13 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/rapidxml-1.13)\n", "tr2 (hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/third_party/tr2)\n", "\n", "Public Domain\n", "-------------\n", "hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/json-bignum.js\n" ] } ], "source": [ "# for reading: open_input_file, for writing: open_output_stream\n", "\n", "with hdfs.open_input_file(\"/data/test.txt\") as f:\n", " #print(f.read_at(200, 0))\n", " reader = TextIOWrapper(BufferedReader(f))\n", " for line in reader:\n", " print(line, end=\"\")\n", " # for P4: pq.read_table(f) OR pq.write_table(????, f)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }