From aec1a11b1adc272daaf0e6a57e1d6f48d5f8b9ff Mon Sep 17 00:00:00 2001 From: ashwinmaran <amaran@wisc.edu> Date: Wed, 5 Apr 2023 09:40:08 -0500 Subject: [PATCH] add p10 and lab-p10 --- lab-p10/README.md | 69 + lab-p10/images/files.jpg | Bin 0 -> 7282 bytes lab-p10/practice.ipynb | 3009 ++++++++++++++++++++++++++++++++++++++ lab-p10/practice_test.py | 513 +++++++ lab-p10/small_data.zip | Bin 0 -> 3468 bytes 5 files changed, 3591 insertions(+) create mode 100644 lab-p10/README.md create mode 100644 lab-p10/images/files.jpg create mode 100644 lab-p10/practice.ipynb create mode 100644 lab-p10/practice_test.py create mode 100644 lab-p10/small_data.zip diff --git a/lab-p10/README.md b/lab-p10/README.md new file mode 100644 index 0000000..5632229 --- /dev/null +++ b/lab-p10/README.md @@ -0,0 +1,69 @@ +# Lab-P10: Files and Namedtuples + +In this lab, you'll get practice with files and namedtuples, in preparation for p10. + +----------------------------- +## Corrections/Clarifications + + +**Find any issues?** Please report to us: + +- Jane Zhang <zhang2752@wisc.edu> +- Abinayaa S Kanimozhi Chandrasekar <kanimozhicha@wisc.edu> + +------------------------------ +## Learning Objectives + +In this lab, you will practice... +* Loading data in json files +* Loading data in csv files +* Using try/except to handle malformed data + +------------------------------ + +## Note on Academic Misconduct + +You may do these lab exercises only with your project partner; you are not allowed to start +working on lab-p10 with one person, then do the project with a different partner. Now may be a +good time to review [our course policies](https://cs220.cs.wisc.edu/s23/syllabus.html). + +**Important:** p10 and p11 are two parts of the same data analysis. +You **cannot** switch project partners between these two projects. +If you partner up with someone for p10, you have to sustain that partnership until end of p11. +**You must acknowledge that you have read this to your lab TA**. + +------------------------------ + +## Segment 1: Setup + +Create a `lab-p10` directory and download the following files into the `lab-p10` directory. + +* `small_data.zip` +* `practice.ipynb` +* `practice_test.py` + +After downloading data.zip, make sure to extract it (using [Mac directions](http://osxdaily.com/2017/11/05/how-open-zip-file-mac/) or [Windows directions](https://support.microsoft.com/en-us/help/4028088/windows-zip-and-unzip-files)). After extracting, you should see a folder called `small_data`, which has the following files in it: + +* mapping_1.json +* mapping_2.json +* mapping_3.json +* stars_1.csv +* stars_2.csv +* stars_3.csv +* planets_1.csv +* planets_2.csv +* planets_3.csv + +You may delete `small_data.zip` after extracting these files from it. + + +## Segment 2: +For the remaining segments, detailed instructions are provided in `practice.ipynb`. From the terminal, open a `jupyter notebook` session, open your `practice.ipynb`, and follow the instructions in `practice.ipynb`. + +## Project 10 + +You can now get started with [p10](https://git.doit.wisc.edu/cdis/cs/courses/cs220/cs220-s23-projects/-/tree/main/p10). **You may copy/paste any code created here in project p10**. Have fun! + +```python + +``` diff --git a/lab-p10/images/files.jpg b/lab-p10/images/files.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe0f2eee1a4c3c9e4ecf0860a14670ef122deb35 GIT binary patch literal 7282 zcmeI0XEdDMyYO#8M3Cqu5)vgujWP&A5G|8LB+&^`qD&aQL??*Rc@PAN=nTQ=V-PLM zglH3l!35D~)WMiJo^#InpYy(Ft+W2;{qTM|*S&wA?!EWAe*3=nwbtI0pOkrk`GJnU z4nRdk1=yTD0A&Wy0;tcO`@5Y5%~_+pKub$QLwk{q?)(MDi;Rp67a16sF0nE*U1GV! zz`)GO%)-Xb!NI|JnTwl~otu@NgZ=MGsHo4z(9qJ;($cdtF)*?JkAu<yuv`FMpL3+9 z5&+JzP*Jl`QMv(s0HC5h%l7Yt|2?SAo#l9*?gITqhO-X!%)mJ+YU*<|)PHAv);sv@ zJwU@k%X&jb<2;+;OF97$cG>WhPZtC=YdSfe{=f^#*?LCMU*zQC=D8w#Q$+NZm^?^9 zQR$Ad)_rXqT|NB=&z>6@o0yuJ+u1w3a&&Tb@$&ZZ_45yS6ZtkOI_6z0EH&+~^o-2+ zS=j}j3yX?NO3TV?>k#z~jZMuhU%R?{di%ch4~&kDPfTKdPE9W?;(jkJul!kEBkb(% z?H?QxkB<NLq5`P@P3s?K|Cblbnb$cQ8fqH4zrCo=`JF+{LPL8)<~*y0A>B(4HUZi2 z3+$RHpK3bk1?8UNIcz<DT;vpzUl1nzt@f{G|D0IF|C8B2#Qwu;8epWRI{SFkEPy&d zq5yDfdZOD(?+dXX2!>lXV7o?b(Y^Fi^emOB-%5+#(L@b0&a)-<)>bqTxhQ~e>%#VF zym=`3uRcp+Y>T&}(pFk%xch@=`uUoTB-X>lMG7zk{@PCDSEm5n)igvLUcFnrjsnDg z!ee4zeaU~4ftEHxteWeQ8XTX<4wT%{ni*~PvQKT7paAnF*;dmew;v$isOVgu&+tmQ zyd)txJqJVV((bd}>X?DL50AxpuW-D|_WRQ!o`(`-c$_Pu1W!s5t^SOnAqnEU70vlR zE#5Vg2IWG!vga!77LL5bJzQSGMt79v^rsCnUJ6|nD2)wcDVwXj2j`D!ul*hwD$+Fz z?auD`Q#W2ed$Z37zc<-X;juIam2ow#9$MwyFhrToyQanpUQQ_4Q!^RvUp;KVLEHFv zQw^7Qpug)+4c?1Zhv2t2u%C&CWH#|O-frdS9J>;5kbEo%!l2@`jjnGs5V{9Cdf$R` zX4dsU$iF`Ev+-wg;JIai7TF2BG9V>+q=4GwHq_?0Ps?t2?|%fxo^DJC{n{J#jg!ST z+r!Sr#>462r1RCz6$-h6eGYDo?olk@Mz<pY*>6O3371^i4jXg$cC@P9LK3x8hBD)u z(g;x>i#7`_M7y|`4C`R~CX3pSa~xV>w{6q-{g-CumfAD-|Lobex1vwIDZt&!oCR%> z-R8Y%6`^1KaPJX!Ww8cr%~o7mHQ);iQJz!*8KM(Ulsh6lo?Hk_x1s7)VKAy~s;^hO zK0;E>1N}g3T9_*PB&NGE`Vxc@p$^eo+#^D%g0T*_L>;N_rQH#`NnTI;Q$=1sJ-{4K zwv-<Fo~o^{t_5MWgKreow^hTCii`An(u`C7eoseV`xvmH13u4ykWNAwcLXC8GzC(9 z-qsdgtTKzWDle?&uq(CDz~{ekwbT9a8j(}_@CDy9y9?tbEhkUq9*WGYAoQ;cysbO3 zUjQU7VWfRmx2jOiGlVI>;70b~b<iXh*1mwL-3k!qSmnQDVSpt15pIs+?vGVTOYXhf zwwbt@khjKp;`1T!Nyg8}OQiA`m3NYju+`1i@=07lB4h3KP^3Bp3Mn(~^s00^R_^mE zus}Q$j4Lw5Wo#BYB!oW`i{uSjGri^c!+MP1OKxANk~BIErO99OkK$M-vz@5b;Gx}< zaJrcKfV~q9RZidIAosP5RyUlqpX8uUl7s^fZfDf&!dziN)Gx6hbEE-LJ4oH3Vzoso zA1Bn@l)a4A{StwOOHA1^Yq)<YFZ)Y&pj*xU$vv;Mu)KWC{;9}>Da5)DPevWIsN|Vw zvp>0B022lNLEE~O#UuWCk?=gs6MS@*PpNaGr{%P@f3OrtWrb_^qyHAF@8*zLdHm3O z`n_RrRS^B7fXuF&yWqz0PlV`trmc3qe5h^vtA^T!tQv0n1a_($rh}C>jS?R-F6sBw zPg8(MeX<|z=xJzw`^ZrQ?7-l1kQ9D0<wTPx-lf>``-LF>;B&C!@}T1ZBpNDv4|Ts_ zZcT4IgV(}-Nb~k<wjJZdBK+_@Lf<=wN$H>m)7Ki>ss}&)Niik5T(EN%G>w!W>5?r% z+MrpN)j>JSw#{o$2qciqB#n*<+-qFSA*HE`!47ln7Su95=CSk!<yN{2CyzulP_2{F zdnX*tO-()%#SQn$T4cmCoW{9)s7$H>zPPhuMvhv92UH6XXk%hjK_!2(*+X4T;gZ=~ zfpNFVFTpuE-n4H$Rmgah!9@xn!%#GdX<VG308|^(!Rqs&S-Zj<f7H~74#h;SuW!zO z4AMR=D_=u<_Cp+9;4WXfyg<##mPhSsIA({hHqmzksr~YwwsjUGUTU*HV>Z-g2mU)u zwDGuZ<GIVkn1$ds1mTQf@PsS0SPzhu)s}6gr3rh?juT=$<oJ%!z;A==F>2TS9Ur6O zhHH@Ho31Ny9##Zod1W&VOR~ZB<+?K3r<Yi#DlSR(6es;CrT`JeLCS7<Tm;?I;|2<V zo7gGYOgMQ!3_O4};S00R=axI;^iQB4Ktc(A0mHmfvx&0`$zF-r7;FoM#3nNTLD_13 z^_Z_haz}G{PI01edWTj;qyH*fB5vI}8(W{$*(x2L&s~jFEYbCY+K=c4aKLv3!EUvj z-3}?g6abp5D>RNDFyT?(-2ERAy2sk(p%BzspX);h^JMSY%I~3F>Ik|-68}P}_cd|T z2qradDc6^>s||Gqj9gT2@qMZ(@@2f+wuYusix#}iWy`0ZL$0GsoHIgal6$j*d(y|{ zw2G&7-AAXYW=t|1-s#Kfw2Ni&G~tQ@;2e9Ie(lyfGh0d$aOk=fd3Ov>0i31zr)Jie zXIzwB5q`CndW%eS21)&jEE0-*8qAmLVp;BaaCXe<38lTT(|)b~)(7I<tmymsv6BR@ zeP^HN>7>3*8Z?6-R0)@wU+d7}P8SZFrf0LSCnLl*S5F}nfTphiR$t^?2T@LkC$!G= z-5rW!1x*b363#uQ3b1#$Z*BYv@Dq3sLp)oK?-K9r0Sj^nbKk~|BwjdV9{@z*p$z^8 zMug(()6CE2r?n^F@>T|RS=DE|*wEq*@2=|dHpr#LGp5qo8~&y`+xY(s=U1YF>`MK0 z0;Y$wK=qiv8htZmVqnWk%la9Le>%avS<}yxW%8UGP_mN}NbP`Xtv7=eR&n8=e(hJ# zyVq_iT@N#}578C8!+rgVeE#`$<&~KIHqYLkl~He_sCtHv0~J8}f$oQ7{Z}|@k%%|v zAYd(`bv5KbA@d6baF{Zm6H2w{uYeev`)3&*$XFvDW>M8Dtr^uEKIS_PHQzpr?)64V zqg>TB>JnZy#%VidU<Z>_iUJf7RlGOeZzjC4d+j1Ko|p85xU{$lHjgt&wGv=@B%JEl zQimt!zdk&#?U3#$(}8-U^olXn&G?t+C5YT}qQ#;Wf46y6l*dap7jvuX87S-A4m}Qb zt-KRC?wOGM2QU_@qXlYuUbGO!7f&qEvNIbBX$=Ltx{Q-8MTL05?(k(Zhs3AJ??<9N z#BUk>p7+v)t{>0fxkA^Eu>=Wi0<Kp~Is=k$lg!(1n*k2i)%emH#c23`kwsU6F-7c# zSOvEcFn_V6JjjN?SNE|v=}orKfnkTM?JN92o=CL7#);qujpgf%8#<HR{k>v=*5PCs zB*snNe?tQEB(DEw>e{3SeE37_C(iGqQB6{X9L$-LDT(#j?To5Ab6iaXpf2pEg-DL7 z?J>0<yR8P-`-4`}Qc?f|viMgD8eCt0>%}p5i}gF{J;<Y{)89jHGCloiZdKI-mAxV1 z8P!jdlqdRK=kj3U+sKKTASfXr>A>;l_Qu}9*p@&`?1py**f+x|oTXny=-w~c=+``c zAL{rDR}{P28H4l}(U-POMmGeTY8!UFP+QzZfA#QZiAIaK@3L$1ymjf(pqXa@SCXU& ziZLWf$fEe>U4hXm8euE9PpPKe<!A=*#pcu-0u{1699mTA-%2Do@-s=7`(8@=?ot2| z^26-0#o<`QDU*DTMUFVc;R0Mj*{a-fAntmKUB<ljazD4jvzlMk1IU*;Hwca0Xy_sv z52$eTif>jG-(uhW+S+V0UX^V9>rp&A`sy;OyOXJxqt%3(@qxrV@e5cz)1e*|=5qe; z*!wy!q3viH`b)#NZ=$DozR3@Bk?vrS3`~C=BRC7D359xY641?UTWLP5tSnw{t*SLs zwEOli**omulMZC!E12lVbN>mZN8s-1jGE#<$HBA1xz2@}cr)Fl@*}-bM5XL`3#kpy zDOFrCt@jEpGx1U#XE|ung1CQY-$QIhv$;7Nl=w=Fjj`%ox!v_oJiGySDi*fihKLgG zyX)IIx+Fg;QLAj^YDtF|3QR~bwd##N5-2C{v3D^WJh|;<L;~acy62c9pX3;iF(Hh( zQhr*t3||cwPL-1g^++@t;WXjV0p_#a)<f5=LiziK6v@c;LA+r8B453990JRB-}_YE zbgT#lah6AmO@0?jRh&!s-NoA5t>{^z1pGGIW+K@T9N)USx$dDKaO1L9t7G!*pzf20 zG&aN3`n=piy=wlj#sMVhGb0AYLTv4NajgXdx^``T6b_Bf@o=xx6-o27H4ADqvK{bk zSrMd49E+wg+LWGmjQ25JNs*|Oyb;Zyl9^*TDN%`=^5aRJxh(&!D3?9?;}-XCYHdX0 zK0F`y`6S{r6fRGahISMckX{h1V<tN#L893V;<pfg;9Hd+o1@@G>BXtH1XFk%o(XcD z&c9}siDz_D>}%&8EO+5LR4#&(vZLDC;1g}+OlL2MygP7x7Ao|J^EdUy89&CnfID43 z$XAImmV`nSYr|M~$n~g|L<?~9Vgn1e(VW$`pqQvpLceg>VkHImT7kh@HjYZp=(dl$ zNx5z;#28>+yCJ6>ZEMQkFWdZzXT8KG=wTUO4I4&b6)1pWXm00f;Mr+y{uCRYzId72 z1tr=zwz<!POT`-K+aaQP{Xgm%>*qNdnNHl}{q0Go@qrNldds9-f7dl-&}Z7$+|jj+ zjHaXU*ftzBo@JUlj$$`_sPSKFKZoPPF*lzCo$20Db%nJ>ZJ4|SJEsV>P%>~RL^VD_ z>muHgHs2V#xqo3}#r5J(H1wcJs!CP2L>J=0cc5w81PNz<D_fhcBdmAlT<KhpI02ND zAQdDc94`xLz~3FpI5_<BFhV};mmIy}kR!{vpNs}>NJ9!hhsSQM(TQmL9*cA)aS|iA zg&S(?BJLm#CbV3xz>D64wNvbN3wMhQB+X_uGkZx(o>ZXk#Lr)k^CNxA_HtkRvar@< zffbgC$+pDk>S`KIq^C#Tx*l=iW&*;deuC$@p*FxV9wgz~&W5}m42qMZ04bx*Z|aKH zOq3OhoJ-C6f6q=2rFGpLnXxjJ{lKizNnOoOb;|pV0>}kJ2!rH{K{M?xXssFjT7Kd} zwGwp30+;4P%&H1uEu{cc5HvD?z}B754#L5S6(&s>kR9t+sMHsGd&^ClFuC}{&G6~> zNkt^dBL6^rq6nT$0Zhh%F4M(SBSCQeDog7=dr(DL#^S!D+VC=qyQOk(Io`WhScT(W zoW^+2jZ%M%cJu_2>sm(KJY2zlLp7Im+$D3T)1?s%TG~gB#6HRN6fz0wU4vC_P3&H( zUPPO`7?pn{b-X@Y^TQ@}`m3}gSAW7%K$a&UnI+^q$QsahkJjGED};ddgHnJ8^%MXv ziOeh&F4>=x-fJ8>uI^*@H|HR}4dD*`N`0`f2*G)Svb_7bPDFc)XHJx`AsbcO2*(W^ znr;A~ofg~fhH^UbDb%%1fCj7SX3ZPBtkfjsHdvfftaM8SSNlD<6Pbk)jn4VpcUSo6 z!A;+S(k-Y<YWd5rzHuimX+8Am_O@53HEZT76zr?kWQ6w&@)<c_h@whcP|xxXQfdi0 zw)D*3Gd^J+FQgY9$d?<~W!h1un{9q@o+Gl<iOf5xrsi*AJq}mpSxC4zZ2ZG@-%60o z$|P<GJGdj_A=xV-kT{}K(-Zq#fR-`7<LE7B2w^1R$Tf3}X$@4*$h;5o@AosN0MVyv zZ`Oj-AVowweWK$6@5!uUhr7S|$$d=W#FkV+Eg932;<lYSxY7CT!xx`e!_8wm)?C;} z+dm$}i+zSfkHx`t=}Se8NuUH_=}PI>FTqw~bKmVB>Ek<euX;T!p8}|O(v@GK94C-w z<?AhvV~VY<0uGcVSS?JomkN$#kkxWh_h{>c5~pkpl>Pu^NGqJHb)VuS#vD1KVm<?h zBNZ}|r!{CE=+j@lU7!=`C3me{c4){`Us!iGQ+Atp6N@W=)MDG7pDrE0pa6l5OKlW@ zdM!U%O$i?YU%bo!F{zjjrh8sLjx;Pfa_@63wzwPVY+7fLFZ#(_R*z6w2}bG7q0#=; zo3uCcu=p53>FAg^lZT<3lX%!v+~wQ9z`q%%^6Rqht0;0eEomG@nxy=AiVLw858)!s zwB8!nZ1p2qQ2?gQ<bU=egI}8{@*MxT4WDU@4@sz4JIJ10PNNZd)HG(p5L(tB_*y^F zpnfZv>0MZWO{L3rgx~qak`_F<nGk1Cm$&?rsMSRX&c4}{tb6SjX9^;oYS<!{<s0xD zLA(|f@Q8hDtFSF3n19eZBSPP^seK)CdZ1hClAS&=9O>BF-W=~A)7MhgbQZ3#jFbV- zp>BMtr5vk)LGAwfi}(hGIfhgNIt!OMK7-4HFUx;Ux%4lpFchQ4MZ&J<8M31foC=xy zuEZ*opI7;n+KX~7C>~7jdA)aJj?KhXXkoIz<(QQ~3ed4Pa%|kETLVVZnbu6IDpnAa zI$M0wquSDk-)+3!oFmGQWewFms8D^vnIOZ)yrJX@^Ydi$e}(&M&FG^t$OP^>&VyH* zHy*TMNJ0(gE%Z2X9u2UckCyud5i+fMRJ5jINyxrlrxNKm+ih5E#gXFfY9LMUjMOAa z;i#IZ(7fG#aX&%qE?tLm_i=BI1KnF_J#X&Q(d8|rDj|JN#khf}cL7614^870W<R1P zu47E6i_;Y{OA3{%BZXnFei#oNnvDzowKmLXapmsQUw52tM44-(T{Zg|9^VH^N-zDi z1|Ku%Fg`Vz3#N4}bAIaY+h?u3oW@ib!kFKcWQ%zxrM;I5zEU!^yZpMlLOrK(utU(7 z09y``&~hjVQpJCNk1kj}E~?)g%N6O$Mw+K*C47F%TjHH+oBFLEQSrb(o+DA_5Sa)Q z-c~rF06Ce<*g>?V$Dud5!H|@CbmYW-R@(nF!5s5g4uA9E4jB!&mGCd>4lV^RQ?oTg z^Iyn5D`#U4U;T6ab*!$U-PKBcAl(H_FE5kdV*TE9PkMSxRPm_s1^L$Xk1yk$56BJ? z$auQ<C!07;cg3SBfVbuao%Zv>Umsf5cU=3i6JAbN%(0k9>NJ6U(PS~RDbnpPN!G1N z&*RJq3MCYPo;J(}5-aH#F_iVy@kYjdSwVNjp2RGU)#Kdl7!-QZmF+PKZ;(J9c3d#E zv2wHm^Qxpk9c95{GR<Kbn$OuKJlYuxF@5ZYfv&~aBT4vaz_IBhTCI#MwM|O$$G5`} zyz0}D-O3A-c9RHwE;H|OP;5|98`i;{!zy#&WTR7O<n0!fW-6`O+0g__|EOHMV%8aT z4WB#2CyC!k*1o!>n&X|}<soo)s8)~d-B3)(kEriT)%m4mF---nLa3EcBGt3Q4|9Gb zgCZaC;q!`}7U4yIra5cMEVWCoJzm3Ej7dUWx1zcbPlcX!xD^@lr#`oM$U=4y&z(WJ z6u{DX)z^<HK$v9_I@FkqFMx@?yC_-^_J1&qw~qNlaEz1B<y(1H|HHLfij~KQVTx19 zbtFG)Dl6TeXRU)RJjE38%>3^CgOP(aYlAxIyB92D`}rFd{B(aNQ*$7uL0XPw0m+*7 z%_$<;b2^3+JDg8$H%pa&4mUe1e<UAPl4n}a3!&`qb{?)3mb0Nw;H56rS~JU#&BTSn z*@&=9kLT%>s)Y*PPLo2fVV4e>rdC&p{078VC%O}=7Pxli&T=Q^{gOCU#T$rGl0jvU zjok;UY0laYu^(lQTw(^rzE!1^S-wmOO2eV|qdyUV{t~Q~f67m@P6^*M`b}41Jr=&{ zIgPF^9xGT!gXT(LelBtgmLu&Wm5&b{#ta7{4%@AX{7b8-9Mb~Wt#_gYFyMdr-!=IE J^$$>{{spv5nnC~o literal 0 HcmV?d00001 diff --git a/lab-p10/practice.ipynb b/lab-p10/practice.ipynb new file mode 100644 index 0000000..606dac0 --- /dev/null +++ b/lab-p10/practice.ipynb @@ -0,0 +1,3009 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7cb1e571", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "# Initialize Otter\n", + "import otter\n", + "grader = otter.Notebook(\"practice.ipynb\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c49b2a7", + "metadata": {}, + "outputs": [], + "source": [ + "import practice_test" + ] + }, + { + "cell_type": "markdown", + "id": "808c1ea8", + "metadata": {}, + "source": [ + "# Lab-P10: File Handling and Namedtuples" + ] + }, + { + "cell_type": "markdown", + "id": "ee613db0", + "metadata": {}, + "source": [ + "## Learning Objectives:\n", + "\n", + "In this lab, you will practice how to...\n", + "* use the `os` module to handle files,\n", + "* load data in json files,\n", + "* combine data from different files to create data structures,\n", + "* create named tuples,\n", + "* use `try/except` to handle malformed data." + ] + }, + { + "cell_type": "markdown", + "id": "0b2e0fb4", + "metadata": {}, + "source": [ + "## Note on Academic Misconduct:\n", + "\n", + "**IMPORTANT**: P10 and P11 are two parts of the same data analysis. You **cannot** switch project partners between these two projects. That is if you partner up with someone for Lab-P10 and P10, you have to work on Lab-P11 and P11 with the **same partner**.\n", + "\n", + "You may do these lab exercises with only with your project partner; you are not allowed to start working on Lab-P10 with one person, then do the project with a different partner. Now may be a good time to review [our course policies](https://cs220.cs.wisc.edu/s23/syllabus.html)." + ] + }, + { + "cell_type": "markdown", + "id": "3979d34c", + "metadata": {}, + "source": [ + "## Setup:\n", + "\n", + "Before proceeding much further, download `small_data.zip` and extract it to a directory on your\n", + "computer (using [Mac directions](http://osxdaily.com/2017/11/05/how-open-zip-file-mac/) or\n", + "[Windows directions](https://support.microsoft.com/en-us/help/4028088/windows-zip-and-unzip-files)).\n", + "\n", + "You need to make sure that the project files are stored in the following structure:\n", + "\n", + "```\n", + "+-- practice.ipynb\n", + "+-- practice_test.py\n", + "+-- small_data\n", + "| +-- .DS_Store\n", + "| +-- .ipynb_checkpoints\n", + "| +-- mapping_1.json\n", + "| +-- mapping_2.json\n", + "| +-- mapping_3.json\n", + "| +-- planets_1.csv\n", + "| +-- planets_2.csv\n", + "| +-- planets_3.csv\n", + "| +-- stars_1.csv\n", + "| +-- stars_2.csv\n", + "| +-- stars_3.csv\n", + "```\n", + "\n", + "Make sure that the files inside `small_data.zip` are inside the `small_data` directory." + ] + }, + { + "cell_type": "markdown", + "id": "3e29243c", + "metadata": {}, + "source": [ + "## Introduction:\n", + "\n", + "In P10 and P11, we will be studying stars and planets outside our Solar System using this dataset from the [NASA Exoplanet Archive](https://exoplanetarchive.ipac.caltech.edu/cgi-bin/TblView/nph-tblView?app=ExoTbls&config=PSCompPars). We will use Python to ask some interesting questions about the laws of the universe and explore the habitability of other planets in our universe.\n", + "\n", + "In Lab-P10, you will work with a small subset of the full dataset. You can find these files inside `small_data.zip`. The full dataset used in P10 and P11 is stored in the same format, so you can then use this code to parse the dataset in P10 and P11." + ] + }, + { + "cell_type": "markdown", + "id": "8089eb9e", + "metadata": {}, + "source": [ + "## The Data:\n", + "\n", + "You can open each of the files inside the `small_data` directory using Microsoft Excel or some other Spreadsheet viewing software to see how the data is stored. For example, these are the contents of the file `stars_1.csv`:\n", + "\n", + "|Name|Spectral Type|Stellar Effective Temperature [K]|Stellar Radius [Solar Radius]|Stellar Mass [Solar mass]|Stellar Luminosity [log(Solar)]|Stellar Surface Gravity [log10(cm/s**2)]|Stellar Age [Gyr]|\n", + "|----|-------------|---------------------------------|-----------------------------|-------------------------|-------------------------------|----------------------------------------|-----------------|\n", + "|55 Cnc|G8V|5172.00|0.94|0.91|-0.197|4.43|10.200|\n", + "|DMPP-1|F8 V|6196.00|1.26|1.21|0.320|4.41|2.010|\n", + "|GJ 876|M2.5V|3271.00|0.30|0.32|-1.907|4.87|1.000|\n", + "\n", + "As you might have already guessed, this file contains data on a number of *stars* outside our solar system along with some important statistics about these stars. The columns here are as follows:\n", + "\n", + "- `Name`: The name given to the star by the International Astronomical Union,\n", + "- `Spectral Type`: The Spectral Classification of the star as per the Morgan–Keenan (MK) system,\n", + "- `Stellar Effective Temperature [K]`: The temperature of a black body (in units of Kelvin) that would emit the observed radiation of the star,\n", + "- `Stellar Radius [Solar Radius]`: The radius of the star (in units of the radius of the Sun),\n", + "- `Stellar Mass [Solar mass]`: The mass of the star (in units of the mass of the Sun),\n", + "- `Stellar Luminosity [log(Solar)]`: The total amount of energy radiated by the star each second (represented by the logarithm of the energy radiated by the Sun in each second),\n", + "- `Stellar Surface Gravity [log10(cm/s**2)]`: The acceleration due to the gravity of the Star at its surface (represented by the logarithm of the acceleration measured in centimeter per second squared),\n", + "- `Stellar Age [Gyr]`: The total age of the star (in units of Giga years, i.e., billions of years).\n", + "\n", + "The two other files `stars_2.csv`, and `stars_3.csv` also store similar data in the same format. At this stage, it is alright if you do not understand what these columns mean - they will be explained to you when they become necessary (in P10 and P11).\n", + "\n", + "On the other hand, here are the contents of the file `planets_1.csv`:\n", + "\n", + "|Planet Name|Discovery Method|Discovery Year|Controversial Flag|Orbital Period [days]|Planet Radius [Earth Radius]|Planet Mass [Earth Mass]|Orbit Semi-Major Axis [au]|Eccentricity|Equilibrium Temperature [K]|Insolation Flux [Earth Flux]|\n", + "|-----------|----------------|--------------|------------------|---------------------|----------------------------|------------------------|---------------------------|------------|---------------------------|----------------------------|\n", + "|55 Cnc b|Radial Velocity|1996|0|14.65160000|13.900|263.97850|0.113400|0.000000|700||\n", + "|55 Cnc c|Radial Velocity|2004|0|44.39890000|8.510|54.47380|0.237300|0.030000|||\n", + "|DMPP-1 b|Radial Velocity|2019|0|18.57000000|5.290|24.27000|0.146200|0.083000|877||\n", + "|GJ 876 b|Radial Velocity|1998|0|61.11660000|13.300|723.22350|0.208317|0.032400|||\n", + "|GJ 876 c|Radial Velocity|2000|0|30.08810000|14.000|226.98460|0.129590|0.255910|||\n", + "\n", + "\n", + "This file contains data on a number of *planets* outside our solar system along with some important statistics about these planets. The columns here are as follows:\n", + "\n", + "- `Planet Name`: The name given to the planet by the International Astronomical Union,\n", + "- `Discovery Method`: The method by which the planet was discovered,\n", + "- `Discovery Year`: The year in which the planet was discovered,\n", + "- `Controversial Flag`: Indicates whether the status of the discovered object as a planet was disputed at the time of discovery, \n", + "- `Orbital Period [days]`: The amount of time (in units of days) it takes for the planet to complete one orbit around its star,\n", + "- `Planet Radius [Earth Radius]`: The radius of the planet (in units of the radius of the Earth),\n", + "- `Planet Mass [Earth Mass]`: The mass of the planet (in units of the mass of the Earth),\n", + "- `Orbit Semi-Major Axis [au]`: The semi-major axis of the planet's elliptical orbit around its host star (in units of Astronomical Units),\n", + "- `Eccentricity`: The eccentricity of the planet's orbit around its host star,\n", + "- `Equilibrium Temperature [K]`: The temperature of the planet (in units of Kelvin) if it were a black body heated only by its host star,\n", + "- `Insolation Flux [Earth Flux]`: The amount of radiation the planet received from its host star per unit of area (in units of the Insolation Flux of the Earth from the Sun).\n", + "\n", + "The two other files `planets_2.csv`, and `planets_3.csv` also store similar data in the same format.\n", + "\n", + "\n", + "Finally, if you take a look at `mapping_1.json` (you can open json files using any Text Editor), you will see that the file looks like this:\n", + "\n", + "```\n", + "{\"55 Cnc b\": \"55 Cnc\", \"55 Cnc c\": \"55 Cnc\", \"DMPP-1 b\": \"DMPP-1\", \"GJ 876 b\": \"GJ 876\", \"GJ 876 c\": \"GJ 876\"}\n", + "```\n", + "\n", + "This file contains a *mapping* from each *planet* in `planets_1.csv` to the *star* in `stars_1.csv` that the planet orbits. Similarly, `mapping_2.json` contains a *mapping* from each *planet* in `planets_2.csv` to the *star* in `stars_2.csv` that the planet orbits, and `mapping_3.json` contains a *mapping* from each *planet* in `planets_3.csv` to the *star* in `stars_3.csv` that the planet orbits." + ] + }, + { + "cell_type": "markdown", + "id": "315474e8", + "metadata": {}, + "source": [ + "## Questions and Functions:\n", + "\n", + "Let us start by importing all the modules we will need for this project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27fac496", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# it is considered a good coding practice to place all import statements at the top of the notebook\n", + "# place all your import statements in this cell if you need to import any more modules for this project\n", + "\n", + "# we have imported these modules for you\n", + "import os\n", + "from collections import namedtuple\n", + "import csv\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "id": "09dd2aa8", + "metadata": {}, + "source": [ + "## Segment 2: File handling with the `os` module\n", + "\n", + "In this segment, you will learn how to use the `os` module effectively." + ] + }, + { + "cell_type": "markdown", + "id": "d9ce1f1d", + "metadata": {}, + "source": [ + "**Question 1.1**: List **all** the files and directories in the directory `small_data` using the `os.listdir` function.\n", + "\n", + "Your output **must** be a **list** of **strings**. The order does **not** matter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37d1f005", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "all_files = os.listdir('small_data')\n", + "\n", + "all_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdd54231", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q1-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd5fa410", + "metadata": {}, + "source": [ + "**Important Warning:** That appeared to work just fine, but you should be **very careful** when using the `os` module. You might have noticed that there are files and directories in the list returned by `os.listdir` that **begin** with the character `\".\"` (specifically in this case, the file `\".DS_Store\"` and the directory `\".ipynb_checkpoints\"`). Such files and directories are used by some operating systems to store metadata. These files are not actually a part of your dataset, and must be **ignored**. \n", + "\n", + "When you are processing the files in any directory, you **must** always **ignore** such files that begin with the character `\".\"`, as they are not actually files in the directory. You **must** do this every time you use `os.listdir`." + ] + }, + { + "cell_type": "markdown", + "id": "76ff4730", + "metadata": {}, + "source": [ + "**Question 1.2**: List **all** the files and directories in the directory `small_data` that do **not** **start with** the character`\".\"`.\n", + "\n", + "Your output **must** be a **list** of **strings**. The order does **not** matter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6200029f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'actual_files', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81413fc8", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q1-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "0baf9066", + "metadata": {}, + "source": [ + "**Important Warning:** You are not done yet. Look at the order in which the files in the **list** `actual_files` are stored. The **ordering** of the files in the **list** returned by `os.listdir` **depends on the operating system**. This means that if you run this code on a **different OS**, the files might be sorted in a **different order**. This makes `os.listdir` a little dangerous because you could index it, and it will always work the same way on your computer, but will **behave differently on another computer**. To avoid these issues, you should make sure that you always **sort** the output of `os.listdir` before you use it. This will ensure that the ordering remains consistent across all operating systems.\n", + "\n", + "When you are processing the files in any directory, you **must** always **sort** the output of `os.listdir` first. You **must** do this every time you use `os.listdir`." + ] + }, + { + "cell_type": "markdown", + "id": "a7dee7c7", + "metadata": {}, + "source": [ + "**Question 2**: List **all** the files and directories in the directory `small_data` that do **not** **start with** the character`\".\"`, sorted in **reverse alphabetical order**.\n", + "\n", + "Your output **must** be a **list** of **strings**, sorted in **reverse alphabetical** order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8396572e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'files_in_small_data', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35670603", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q2\")" + ] + }, + { + "cell_type": "markdown", + "id": "e38c5599", + "metadata": {}, + "source": [ + "**Important Warning:** Every time you use `os.listdir`, you **must** **ignore** files and directories that start with `\".\"`, and also **sort** the **list** returned by the function, before you do anything else. Otherwise, you are likely to write code that **works on your computer**, but **crashes on other computers**. Such errors are hard to debug, and you **must** be very careful." + ] + }, + { + "cell_type": "markdown", + "id": "a1068d6e", + "metadata": {}, + "source": [ + "**Question 3.1**: What is the **path** of the file `stars_1.csv` in the directory `small_data`.\n", + "\n", + "You are **allowed** to 'hardcode' the strings `'small_data'` and `'stars_1.csv'` to answer this question.\n", + "\n", + "**Warnings:**\n", + "\n", + "1. You **must not** hardcode the **absolute path** of any file in your code. For instance, the **absolute path** of this file `stars_1.csv` could be: `C:\\Users\\ms\\cs220\\lab-p10\\small_data\\stars_1.csv`. However, if you hardcode this path in your code, it will **only work on your computer**. In this case, since the notebook `practice.ipynb` is stored in the path `C:\\Users\\ms\\cs220\\lab-p10`, the **relative path** of the file is `small_data\\stars_1.csv`, and this is the path that **must** be used, if you want your code to work on all computers.\n", + "2. You **must not** hardcode either the character `\"\\\"` or the character `\"/\"` in your paths. If you do so, your code will **crash** when it runs on a **different operating system**. You **must** use the `os.path.join` function to create paths." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07c47fa4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "stars_1_path = os.path.join(\"small_data\", \"stars_1.csv\")\n", + "\n", + "stars_1_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f461a00b", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q3-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "8fc50972", + "metadata": {}, + "source": [ + "**Question 3.2**: List the **paths** of **all** the files in the directory `small_data`.\n", + "\n", + "Your output **must** be a **list** of **strings**. You must **ignore** files that **start with** the character`\".\"`, and your output **must** be sorted in **reverse alphabetical order**.\n", + "\n", + "You are **allowed** to \"hardcode\" the name of the directory `small_data` to answer this question.\n", + "\n", + "**Warnings:**\n", + "\n", + "1. You **must not** hardcode the **absolute path** of any file in your code. You must use the **relative path** of the files.\n", + "2. You **must not** hardcode either the character `\"\\\"` or the character `\"/\"` in your paths. You **must** use the `os.path.join` function to create paths." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba9aa18a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'paths_in_small_data', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d37f8b04", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q3-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "bc4582af", + "metadata": {}, + "source": [ + "**Question 4.1**: List the **paths** of **all** the JSON files in the directory `small_data`.\n", + "\n", + "Your output **must** be a **list** of **strings**. You must **ignore** files that **start with** the character`\".\"`, and your output **must** sorted in **reverse alphabetical order**.\n", + "\n", + "**Hint:** You can identify the JSON files as the files which end with the string `\".json\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2078e3cb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'json_paths', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6050a736", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q4-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "15892c6d", + "metadata": {}, + "source": [ + "**Question 4.2**: List the **paths** of **all** the files in the directory `small_data`, whose filename starts with `\"stars\"`.\n", + "\n", + "Your output **must** be a **list** of **strings**. You must **ignore** files that **start with** the character`\".\"`, and your output **must** sorted in **reverse alphabetical order**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4219e195", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'stars_paths', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "225d3b31", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q4-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "944a550e", + "metadata": {}, + "source": [ + "## Segment 3: Creating Namedtuples\n", + "\n", + "In P10, you will be reading the data in files similar to `stars_1.csv`, `stars_2.csv`, and `stars_3.csv`, and storing the data as a **dictionary** of **named tuples**. Now would be a great time to practice creating similar data structues." + ] + }, + { + "cell_type": "markdown", + "id": "c2359d2d", + "metadata": {}, + "source": [ + "### Data Structure 1: namedtuple `Star`\n", + "\n", + "We will now create a new `Star` type (using namedtuple). It **must** have the following attributes:\n", + "\n", + "* `spectral_type`,\n", + "* `stellar_effective_temperature`,\n", + "* `stellar_radius`,\n", + "* `stellar_mass`,\n", + "* `stellar_luminosity`,\n", + "* `stellar_surface_gravity`,\n", + "* `stellar_age`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa14b675", + "metadata": {}, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "# define the list of attributes we want in our namedtuple\n", + "star_attributes = ['spectral_type',\n", + " 'stellar_effective_temperature',\n", + " 'stellar_radius',\n", + " 'stellar_mass',\n", + " 'stellar_luminosity',\n", + " 'stellar_surface_gravity',\n", + " 'stellar_age']\n", + "\n", + "# create the namedtuple type 'Star' with the correct attributes\n", + "Star = namedtuple(\"Star\", star_attributes)" + ] + }, + { + "cell_type": "markdown", + "id": "645280cf", + "metadata": {}, + "source": [ + "Let us now test whether we have defined the namedtuple properly by creating a `Star` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8f9792", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# run this following cell to initialize and test an example Star object\n", + "\n", + "sun = Star('G2 V', 5780.0, 1.0, 1.0, 0.0, 4.44, 4.6)\n", + "\n", + "sun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1211c9d", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"star_object\")" + ] + }, + { + "cell_type": "markdown", + "id": "effba07e", + "metadata": {}, + "source": [ + "### Segment 3.1: Creating `Star` objects from `stars_1.csv`\n", + "\n", + "Now that we have created the `Star` namedtuple, our next objective will be to read the files `stars_1.csv`, `stars_2.csv`, and `stars_3.csv` and create `Star` objects out of all the stars in there. In order to process the CSV files, you will first need to copy/paste the `process_csv` function you have been using since P6." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70577d4f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# copy & paste the process_csv file from previous projects here\n" + ] + }, + { + "cell_type": "markdown", + "id": "af903947", + "metadata": {}, + "source": [ + "You are now ready to read the data in `stars_1.csv` using `process_csv` and convert the data into `Star` objects. In the cell below, you **must** read the data in `stars_1.csv` and extract the **header** and the non-header **rows** of the file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc9d27a4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "stars_1_csv = process_csv(os.path.join(\"small_data\", \"stars_1.csv\")) # read the data in 'stars_1.csv'\n", + "stars_header = ...\n", + "stars_1_rows = ..." + ] + }, + { + "cell_type": "markdown", + "id": "770e991c", + "metadata": {}, + "source": [ + "If you wish to **verify** that you have read the file and defined the variables correctly, you can check that `stars_header` has the value:\n", + "\n", + "```python\n", + "['Name', 'Spectral Type', 'Stellar Effective Temperature [K]', 'Stellar Radius [Solar Radius]', \n", + " 'Stellar Mass [Solar mass]', 'Stellar Luminosity [log(Solar)]', 'Stellar Surface Gravity [log10(cm/s**2)]',\n", + " 'Stellar Age [Gyr]']\n", + "```\n", + "\n", + "and that `stars_1_rows` has the value:\n", + "\n", + "```python\n", + "[['55 Cnc', 'G8V', '5172.00', '0.94', '0.91', '-0.197', '4.43', '10.200'],\n", + " ['DMPP-1', 'F8 V', '6196.00', '1.26', '1.21', '0.320', '4.41', '2.010'],\n", + " ['GJ 876', 'M2.5V', '3271.00', '0.30', '0.32', '-1.907', '4.87', '1.000']]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "59589559", + "metadata": {}, + "source": [ + "**Question 5**: Create a `Star` object for the **first** star in `\"stars_1.csv\"`.\n", + "\n", + "The **attribute** of the `Star` namedtuple object, the corresponding **column** of the `stars_1.csv` file where the value should be obtained from, and the correct **data type** for the value are listed in the table below:\n", + "\n", + "|Attribute of `Star` object|Column of `stars_1.csv`|Data Type|\n", + "|---------|------|---------|\n", + "|`spectral_type`|Spectral Type|**string**|\n", + "|`stellar_effective_temperature`|Stellar Effective Temperature [K]|**float**|\n", + "|`stellar_radius`|Stellar Radius [Solar Radius]|**float**|\n", + "|`stellar_mass`|Stellar Mass [Solar mass]|**float**|\n", + "|`stellar_luminosity`|Stellar Luminosity [log(Solar)]|**float**|\n", + "|`stellar_surface_gravity`|Stellar Surface Gravity [log10(cm/s**2)]|**float**|\n", + "|`stellar_age`|Stellar Age [Gyr]|**float**|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "809ef473", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "row_idx = 0 # the index of the star we want to convert into a Star object\n", + "\n", + "# extract the values from stars_1_rows\n", + "spectral_type = stars_1_rows[row_idx][stars_header.index(...)]\n", + "stellar_effective_temperature = float(stars_1_rows[row_idx][stars_header.index(...)])\n", + "stellar_radius = ...\n", + "stellar_mass = ...\n", + "stellar_luminosity = ...\n", + "stellar_surface_gravity = ...\n", + "stellar_age = ...\n", + "\n", + "# initialize 'first_star'\n", + "first_star = Star(spectral_type, stellar_effective_temperature, stellar_radius, \\\n", + " stellar_mass, stellar_luminosity, \\\n", + " stellar_surface_gravity, stellar_age)\n", + "\n", + "first_star" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47721e36", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q5\")" + ] + }, + { + "cell_type": "markdown", + "id": "8ec9a172", + "metadata": {}, + "source": [ + "**Question 6**: Create a `Star` object for the **second** star in `\"stars_1.csv\"`.\n", + "\n", + "You **must** create the `Star` object similarly to what you did in the previous question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57de668f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'second_star', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53ac49fc", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q6\")" + ] + }, + { + "cell_type": "markdown", + "id": "d2934799", + "metadata": {}, + "source": [ + "**Question 7.1**: What is the `spectral_type` of the **second** star in `\"stars_1.csv\"`?\n", + "\n", + "You **must** answer this question by accessing the correct **attribute** of the `Star` object `second_star`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16028988", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "second_star_spectral_type = second_star.spectral_type\n", + "\n", + "second_star_spectral_type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f60ee11", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q7-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "22ff4de6", + "metadata": {}, + "source": [ + "**Question 7.2**: What is the `stellar_age` of the **first** star in `\"stars_1.csv\"`?\n", + "\n", + "You **must** answer this question by accessing the correct **attribute** of the `Star` object `first_star`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d237a1e0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'first_star_stellar_age', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98c3555f", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q7-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "cbb5dc03", + "metadata": {}, + "source": [ + "**Question 7.3**: What is the **ratio** of the `stellar_radius` of the **first** star in `\"stars_1.csv\"` to the **second** star in `\"stars_1.csv\"`?\n", + "\n", + "You **must** answer this question by accessing the correct **attribute** of the `Star` objects `first_star` and `second_star`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "401b5887", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'stellar_radius_ratio', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b40d09ff", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q7-3\")" + ] + }, + { + "cell_type": "markdown", + "id": "3cc4eb5b", + "metadata": {}, + "source": [ + "**Question 8**: Create a **dictionary** mapping the `name` of each star in `\"stars_1.csv\"` to its `Star` object.\n", + "\n", + "Your output **must** look like this:\n", + "```python\n", + "{'55 Cnc': Star(spectral_type='G8V', stellar_effective_temperature=5172.0, stellar_radius=0.94, \n", + " stellar_mass=0.91, stellar_luminosity=-0.197, stellar_surface_gravity=4.43, stellar_age=10.2),\n", + " 'DMPP-1': Star(spectral_type='F8 V', stellar_effective_temperature=6196.0, stellar_radius=1.26, \n", + " stellar_mass=1.21, stellar_luminosity=0.32, stellar_surface_gravity=4.41, stellar_age=2.01),\n", + " 'GJ 876': Star(spectral_type='M2.5V', stellar_effective_temperature=3271.0, stellar_radius=0.3, \n", + " stellar_mass=0.32, stellar_luminosity=-1.907, stellar_surface_gravity=4.87, stellar_age=1.0)}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "502c6cca", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "stars_1_dict = {} # initialize empty dictionary to store all stars\n", + "\n", + "for row_idx in range(len(stars_1_rows)):\n", + " star_name = stars_1_rows[row_idx][stars_header.index(...)]\n", + " spectral_type = ...\n", + " stellar_effective_temperature = ...\n", + " # extract the other columns from 'stars_1_rows'\n", + " \n", + " star = ... # initialize the 'Star' object using the variables defined above\n", + " stars_1_dict[...] = star\n", + "\n", + "stars_1_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd6e88ce", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "stars_1_rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "816ac420", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q8\")" + ] + }, + { + "cell_type": "markdown", + "id": "11f221a4", + "metadata": {}, + "source": [ + "**Question 9.1**: What is the `Star` object of the star (in `stars_1.csv`) named *GJ 876*?\n", + "\n", + "You **must** access the `Star` object in `stars_1_dict` **dictionary** defined above to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5103a392", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'gj_876', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9db215ed", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q9-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "86456ed6", + "metadata": {}, + "source": [ + "**Question 9.2**: What is the `stellar_luminosity` of the star (in `stars_1.csv`) named *GJ 876*?\n", + "\n", + "You **must** access the `Star` object in `stars_1_dict` **dictionary** defined above to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa69195b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'gj_876_luminosity', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d22185ca", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q9-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "a24daea4", + "metadata": {}, + "source": [ + "### Segment 3.2: Data Cleaning - missing data\n", + "\n", + "We have already parsed the data in `stars_1.csv`. We are now ready to parse the data in **all** the star files of the `small_data` directory. However, there is one minor inconvenience - there is some missing data in `stars_2.csv` and `stars_3.csv`. For example, this is the **first** row of `stars_2.csv`:\n", + "\n", + "```python\n", + "['HD 158259', 'G0', '5801.89', '1.21', '1.08', '0.212', '4.25', '']\n", + "```\n", + "\n", + "As you can see, the value of the last column (`Stellar Age [Gyr]`) is `''`, which means that the data is missing. When the data is missing, we will want the value of the corresponding attribute in the `Star` object to be `None`.\n", + "\n", + "So, for example, if we are to convert the row above to be a `Star` object, it should look like:\n", + "\n", + "```python\n", + "Star(spectral_type='G0', stellar_effective_temperature=5801.89, stellar_radius=1.21, stellar_mass=1.08,\n", + " stellar_luminosity=0.212, stellar_surface_gravity=4.25, stellar_age=None)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "16f13f76", + "metadata": {}, + "source": [ + "### Function 1: `star_cell(row_idx, col_name, stars_rows, header=stars_header)`\n", + "\n", + "Since we need to clean the values of the **list** of **lists** `stars_rows` before we can create our required data structure (**dictionary** mapping **strings** to `Star` objects), now would be a good time to create a function that takes in a `row_idx`, a `col_name` and a **list** of **lists** `stars_rows` (as well as the optional argument `header`) and returns the value of the column `col_name` at the row `row_idx`.\n", + "\n", + "This function **must** typecast the values it returns based on the `col_name`. If the value in `stars_rows` is missing (i.e., it is `''`), then the value returned **must** be `None`.\n", + "\n", + "Recall that the **column** of `stars_rows` where the value should be obtained from, and the correct **data type** for the value are listed in the table below:\n", + "\n", + "|Column of `stars_rows`|Data Type|\n", + "|------|---------|\n", + "|Name|**string**|\n", + "|Spectral Type|**string**|\n", + "|Stellar Effective Temperature [K]|**float**|\n", + "|Stellar Radius [Solar Radius]|**float**|\n", + "|Stellar Mass [Solar mass]|**float**|\n", + "|Stellar Luminosity [log(Solar)]|**float**|\n", + "|Stellar Surface Gravity [log10(cm/s**2)]|**float**|\n", + "|Stellar Age [Gyr]|**float**|\n", + "\n", + "**Hint:** You can use the `cell` function defined in P6 and P7 for inspiration here." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f89e2a16", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "# the default argument to the parameter 'header' is the global variable 'stars_header' defined above\n", + "def star_cell(row_idx, col_name, stars_rows, header=stars_header):\n", + " col_idx = header.index(...)\n", + " val = stars_rows[row_idx][col_idx]\n", + " # return None if value is missing\n", + " # else typecast 'val' and return it depending on 'col_name'" + ] + }, + { + "cell_type": "markdown", + "id": "85c16a43", + "metadata": {}, + "source": [ + "**Question 10.1**: Use the `star_cell` function to find the value of the column `\"Spectral Type\"` of the **first** star in `\"stars_2.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ad3d789", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "# first read the data in 'stars_2.csv' as a list of lists\n", + "stars_2_data = process_csv(os.path.join(\"small_data\", \"stars_2.csv\"))\n", + "stars_2_rows = stars_2_data[1:]\n", + "\n", + "# use the 'star_cell' function to extract the correct value\n", + "first_star_type = star_cell(0, 'Spectral Type', stars_2_rows)\n", + "\n", + "first_star_type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e832900c", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q10-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "58ebba21", + "metadata": {}, + "source": [ + "**Question 10.2**: Use the `star_cell` function to find the value of the column `\"Stellar Age [Gyr]\"` of the **second** star in `\"stars_2.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c5b7bd3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "# do not worry if there is no output, the variable is expected to hold the value None\n", + "\n", + "# use the 'star_cell' function to extract the correct value\n", + "second_star_age = star_cell(1, 'Stellar Age [Gyr]', stars_2_rows)\n", + "\n", + "second_star_age" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66c21d45", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q10-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "73b6f8f3", + "metadata": {}, + "source": [ + "**Question 10.3**: Use the `star_cell` function to find the value of the column `\"Stellar Mass [Solar mass]\"` of the **third** star in `\"stars_2.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6e075ce", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "# use the 'star_cell' function to extract the correct value\n", + "third_star_mass = star_cell(2, 'Stellar Mass [Solar mass]', stars_2_rows)\n", + "\n", + "third_star_mass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "855ea6c0", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q10-3\")" + ] + }, + { + "cell_type": "markdown", + "id": "3028047c", + "metadata": {}, + "source": [ + "**Question 11**: Create a **dictionary** mapping the `name` of each star in `\"stars_2.csv\"` to its `Star` object.\n", + "\n", + "You **must** use the `star_cell` function to extract data from `stars_2.csv`.\n", + "\n", + "Your output **must** look like this:\n", + "```python\n", + "{'HD 158259': Star(spectral_type='G0', stellar_effective_temperature=5801.89, stellar_radius=1.21, \n", + " stellar_mass=1.08, stellar_luminosity=0.212, stellar_surface_gravity=4.25, stellar_age=None),\n", + " 'K2-187': Star(spectral_type=None, stellar_effective_temperature=5438.0, stellar_radius=0.83, \n", + " stellar_mass=0.97, stellar_luminosity=-0.21, stellar_surface_gravity=4.6, stellar_age=None),\n", + " 'WASP-47': Star(spectral_type=None, stellar_effective_temperature=5552.0, stellar_radius=1.14, \n", + " stellar_mass=1.04, stellar_luminosity=0.032, stellar_surface_gravity=4.34, stellar_age=6.5)}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6256a35", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "stars_2_dict = {} # initialize empty dictionary to store all stars\n", + "\n", + "for row_idx in range(len(stars_2_rows)):\n", + " star_name = star_cell(row_idx, 'Name', stars_2_rows)\n", + " spectral_type = ...\n", + " stellar_effective_temperature = ...\n", + " # extract the other columns from 'stars_2_rows'\n", + " \n", + " star = ... # initialize the 'Star' object using the variables defined above\n", + " stars_2_dict[...] = star\n", + "\n", + "stars_2_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "751fe1be", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q11\")" + ] + }, + { + "cell_type": "markdown", + "id": "55712f32", + "metadata": {}, + "source": [ + "**Question 12.1**: Create a **dictionary** mapping the `name` of each star in `\"stars_3.csv\"` to its `Star` object.\n", + "\n", + "You **must** use the `star_cell` function to extract data from `stars_3.csv`.\n", + "\n", + "Your output **must** look like this:\n", + "```python\n", + "{'K2-133': Star(spectral_type='M1.5 V', stellar_effective_temperature=3655.0, stellar_radius=0.46, \n", + " stellar_mass=0.46, stellar_luminosity=-1.479, stellar_surface_gravity=4.77, stellar_age=None),\n", + " 'K2-138': Star(spectral_type='G8 V', stellar_effective_temperature=5356.3, stellar_radius=0.86, \n", + " stellar_mass=0.94, stellar_luminosity=-0.287, stellar_surface_gravity=4.54, stellar_age=2.8),\n", + " 'GJ 667 C': Star(spectral_type='M1.5 V', stellar_effective_temperature=3350.0, stellar_radius=None, \n", + " stellar_mass=0.33, stellar_luminosity=-1.863, stellar_surface_gravity=4.69, stellar_age=2.0)}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adb9098c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'stars_3_dict', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4bc9239", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q12-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "bcd64cc9", + "metadata": {}, + "source": [ + "**Question 12.2**: Combine the three **dictionaries** `stars_1_dict`, `stars_2_dict`, and `stars_3_dict` into a single **dictionary** with all the stars in the `small_data` directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "083bf8d1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "stars_dict = ... # initialize an empty dictionary\n", + "stars_dict.update(...) # add stars_1_dict to stars_dict\n", + "# add stars_2_dict and stars_3_dict to stars_dict\n", + "\n", + "stars_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c05baf33", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q12-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "3830125c", + "metadata": {}, + "source": [ + "### Data Structure 2: namedtuple `Planet`\n", + "\n", + "Just as you did with the stars, you will be using named tuples to store the data about the planets in the `planets_1.csv`, `planets_2.csv`, and `planets_3.csv` files. Before you start reading these files however, you **must** create a new `Planet` type (using namedtuple). It **must** have the following attributes:\n", + "\n", + "* `planet_name`,\n", + "* `host_name`,\n", + "* `discovery_method`,\n", + "* `discovery_year`,\n", + "* `controversial_flag`,\n", + "* `orbital_period`,\n", + "* `planet_radius`,\n", + "* `planet_mass`,\n", + "* `semi_major_radius`,\n", + "* `eccentricity`,\n", + "* `equilibrium_temperature`\n", + "* `insolation_flux`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c91d9990", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# define the namedtuple 'Planet' here\n", + "\n", + "planets_attributes = ... # initialize the list of attributes\n", + "\n", + "# define the namedtuple 'Planet'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12727d44", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# run this following cell to initialize and test an example Planet object\n", + "# if this cell fails to execute, you have likely not defined the namedtuple 'Planet' correctly\n", + "jupiter = Planet('Jupiter', 'Sun', 'Imaging', 1610, False, 4333.0, 11.209, 317.828, 5.2038, 0.0489, 110, 0.0345)\n", + "\n", + "jupiter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d90d36", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"planet_object\")" + ] + }, + { + "cell_type": "markdown", + "id": "b7e7130d", + "metadata": {}, + "source": [ + "### Segment 3.3: Creating `Planet` objects\n", + "\n", + "We are now ready to read the files in the `small_data` directory and create `Planet` objects. Creating `Planet` objects however, is going to be more difficult than creating `Star` objects, because the data required to create a single `Planet` object is split up into different files.\n", + "\n", + "The `planets_1.csv`, `planets_2.csv`, and `planets_3.csv` files contain all the data required to create `Planet` objects **except** for the `host_name`. The `host_name` for each planet is to be found in the `mapping_1.json`, `mapping_2.json`, and `mapping_3.json` files." + ] + }, + { + "cell_type": "markdown", + "id": "a0c09672", + "metadata": {}, + "source": [ + "First, let us read the data in `planets_1.csv`. Since this is a CSV file, you can use the `process_csv` function from above to read this file. In the cell below, you **must** read the data in `planets_1.csv` and extract the **header** and the non-header **rows** of the file." + ] + }, + { + "cell_type": "markdown", + "id": "dbd2e784", + "metadata": {}, + "source": [ + "**Question 13.1**: Read the contents of `'planets_1.csv'` into a **list** of **lists** using the `process_csv` function, and extract the **header** and the **rows** in the file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ef2ac17", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "planets_1_csv = process_csv(...) # read the data in 'planets_1.csv'\n", + "planets_header = ...\n", + "planets_1_rows = ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "331a296b", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q13-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "acb8b8ac", + "metadata": {}, + "source": [ + "Now, you are ready to read the data in `mapping_1.json`. Since this is a JSON file, you will need a new function to read this file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d9f8b0d", + "metadata": {}, + "outputs": [], + "source": [ + "# this function uses the 'load' function from the json module (already imported in this notebook) to read files\n", + "def read_json(path):\n", + " with open(path, encoding=\"utf-8\") as f:\n", + " return json.load(f)" + ] + }, + { + "cell_type": "markdown", + "id": "16439061", + "metadata": {}, + "source": [ + "**Question 13.2**: Read the contents of `'mapping_1.json'` into a **dictionary** using the `read_json` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95ebd431", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this for you\n", + "\n", + "mapping_1_json = read_json(os.path.join(\"small_data\", \"mapping_1.json\"))\n", + "\n", + "mapping_1_json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05f1ce5c", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q13-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "aa4b3d64", + "metadata": {}, + "source": [ + "### Segment 3.4: Combining data from CSV and JSON files\n", + "\n", + "We are now ready to combine the data from `planets_1_rows` and `mapping_1_json` to create `Planet` objects. Before we start, it might be useful to create a function similar to `star_cell` for preprocessing the values in the CSV files." + ] + }, + { + "cell_type": "markdown", + "id": "8c90c9a8", + "metadata": {}, + "source": [ + "### Function 2: `planet_cell(row_idx, col_name, planets_rows, header=planets_header)`\n", + "\n", + "Just like the data in `stars_1.csv`, `stars_2.csv`, and `stars_3.csv`, some of the data in `planets_1.csv`, `planets_2.csv`, and `planets_3.csv` is **missing**. So, now would be a good time to create a function that takes in a `row_idx`, a `col_name` and a **list** of **lists** `planets_rows` (as well as the optional argument `header`) and returns the value of the column `col_name` at the row `row_idx`.\n", + "\n", + "This function **must** typecast the values it returns based on the `col_name`. If the value in `planets_rows` is missing (i.e., it is `''`), then the value returned **must** be `None`.\n", + "\n", + "The **column** of `planets_rows` where the value should be obtained from, and the correct **data type** for the value are listed in the table below:\n", + "\n", + "|Column of `planets_rows`|Data Type|\n", + "|------|---------|\n", + "|Planet Name|**string**|\n", + "|Discovery Year|**int**|\n", + "|Discovery Method|**string**|\n", + "|Controversial Flag|**bool**|\n", + "|Orbital Period [days]|**float**|\n", + "|Planet Radius [Earth Radius]|**float**|\n", + "|Planet Mass [Earth Mass]|**float**|\n", + "|Orbit Semi-Major Axis [au]|**float**|\n", + "|Eccentricity|**float**|\n", + "|Equilibrium Temperature [K]|**float**|\n", + "|Insolation Flux [Earth Flux]|**float**|\n", + "\n", + "**Important Warning:** Notice that the `Controversial Flag` column has to be converted into a **bool**. The data is stored in `planets_1.csv` (and consequently in `planets_rows`) as `\"0\"/\"1\"` values (with `\"0\"` representing `False` and `\"1\"` representing `True`). However typecasting **strings** to **bools** is not straightforward. Run the following cell and try to figure out what is happening:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f64fff3", + "metadata": {}, + "outputs": [], + "source": [ + "strings = [\"0\", \"1\", \"\", \" \", \"True\", \"False\"]\n", + "for string in strings:\n", + " print(bool(string))" + ] + }, + { + "cell_type": "markdown", + "id": "7adaed7d", + "metadata": {}, + "source": [ + "If you want to convert the **strings** into **bools**, you will have to explicitly use `if/else` statements to determine whether the value is `\"0\"` or `\"1\"`, as can be seen in the starter code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e78aa1c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "def planet_cell(row_idx, col_name, planets_rows, header=planets_header):\n", + " col_idx = ... # extract col_idx from col_name and header\n", + " val = ... # extract the value at row_idx and col_idx\n", + " if val == '':\n", + " return None\n", + " if col_name in [\"Controversial Flag\"]:\n", + " if val == \"1\":\n", + " return ...\n", + " else:\n", + " return ...\n", + " # for all other columns typecast 'val' and return it depending on col_name" + ] + }, + { + "cell_type": "markdown", + "id": "13800e0a", + "metadata": {}, + "source": [ + "**Question 14.1**: Use the `planet_cell` function to find the value of the column `\"Planet Name\"` of the **first** planet in `\"planets_1.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4e9092d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "first_planet_name = planet_cell(0, 'Planet Name', planets_1_rows)\n", + "\n", + "first_planet_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ee54496", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q14-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "8279bd8e", + "metadata": {}, + "source": [ + "**Question 14.2**: Use the `planet_cell` function to find the value of the column `\"Insolation Flux [Earth Flux]\"` of the **first** planet in `\"planets_1.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b9933b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "# do not worry if there is no output, the variable is expected to hold the value None\n", + "\n", + "first_planet_flux = planet_cell(0, 'Insolation Flux [Earth Flux]', planets_1_rows)\n", + "\n", + "first_planet_flux" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c160864e", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q14-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "26c2b56c", + "metadata": {}, + "source": [ + "**Question 14.3**: Use the `planet_cell` function to find the value of the column `\"Controversial Flag\"` of the **second** planet in `\"planets_1.csv\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9a372e2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'second_planet_controversy', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a5ef2a0", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q14-3\")" + ] + }, + { + "cell_type": "markdown", + "id": "ce99a6e1", + "metadata": {}, + "source": [ + "**Question 15**: Create a `Planet` object for the **first** star in `\"planets_1.csv\"`.\n", + "\n", + "The **attribute** of the `Planet` namedtuple object, the corresponding **column** of the `planets_1.csv` file where the value should be obtained from, and the correct **data type** for the value are listed in the table below:\n", + "\n", + "|Attribute of `Planet` object|Column of `planets_1.csv`|Data Type|\n", + "|---------|------|---------|\n", + "|`planet_name`|Planet Name|**string**|\n", + "|`host_name`| - |**string**|\n", + "|`discovery_method`|Discovery Method|**string**|\n", + "|`discovery_year`|Discovery Year|**int**|\n", + "|`controversial_flag`|Controversial Flag|**bool**|\n", + "|`orbital_period`|Orbital Period [days]|**float**|\n", + "|`planet_radius`|Planet Radius [Earth Radius]|**float**|\n", + "|`planet_mass`|Planet Mass [Earth Mass]|**float**|\n", + "|`semi_major_radius`|Orbit Semi-Major Axis [au]|**float**|\n", + "|`eccentricity`|Eccentricity|**float**|\n", + "|`equilibrium_temperature`|Equilibrium Temperature [K]|**float**|\n", + "|`insolation_flux`|Insolation Flux [Earth Flux]|**float**|\n", + "\n", + "\n", + "The value of the `host_name` attribute is found in `mapping_1.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2632c39", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "planets_header" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dc17264", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "row_idx = 0 # the index of the planet we want to convert into a Planet object\n", + "\n", + "# extract the values from planets_1_rows\n", + "planet_name = planet_cell(row_idx, 'Planet Name', planets_1_rows)\n", + "host_name = mapping_1_json[planet_name]\n", + "discovery_method = planet_cell(row_idx, 'Discovery Method', planets_1_rows)\n", + "discovery_year = ...\n", + "controversial_flag = ...\n", + "orbital_period = ...\n", + "planet_radius = ...\n", + "planet_mass = ...\n", + "semi_major_radius = ...\n", + "eccentricity = ...\n", + "equilibrium_temperature = ...\n", + "insolation_flux = ...\n", + "\n", + "# initialize 'first_planet'\n", + "first_planet = Planet(planet_name, host_name, discovery_method, discovery_year,\\\n", + " controversial_flag, orbital_period, planet_radius, planet_mass,\\\n", + " semi_major_radius, eccentricity, equilibrium_temperature, insolation_flux)\n", + "\n", + "first_planet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43275a5a", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q15\")" + ] + }, + { + "cell_type": "markdown", + "id": "6b01aa7f", + "metadata": {}, + "source": [ + "**Question 16**: Create a **list** of `Planet` objects of each planet in `\"planets_1.csv\"`.\n", + "\n", + "Your output **must** look like this:\n", + "```python\n", + "[Planet(planet_name='55 Cnc b', host_name='55 Cnc', discovery_method='Radial Velocity', \n", + " discovery_year=1996, controversial_flag=False, orbital_period=14.6516, \n", + " planet_radius=13.9, planet_mass=263.9785, semi_major_radius=0.1134, eccentricity=0.0,\n", + " equilibrium_temperature=700.0, insolation_flux=None),\n", + " Planet(planet_name='55 Cnc c', host_name='55 Cnc', discovery_method='Radial Velocity', \n", + " discovery_year=2004, controversial_flag=False, orbital_period=44.3989, \n", + " planet_radius=8.51, planet_mass=54.4738, semi_major_radius=0.2373, eccentricity=0.03, \n", + " equilibrium_temperature=None, insolation_flux=None),\n", + " Planet(planet_name='DMPP-1 b', host_name='DMPP-1', discovery_method='Radial Velocity', \n", + " discovery_year=2019, controversial_flag=False, orbital_period=18.57, \n", + " planet_radius=5.29, planet_mass=24.27, semi_major_radius=0.1462, eccentricity=0.083, \n", + " equilibrium_temperature=877.0, insolation_flux=None),\n", + " Planet(planet_name='GJ 876 b', host_name='GJ 876', discovery_method='Radial Velocity', \n", + " discovery_year=1998, controversial_flag=False, orbital_period=61.1166, \n", + " planet_radius=13.3, planet_mass=723.2235, semi_major_radius=0.208317, eccentricity=0.0324,\n", + " equilibrium_temperature=None, insolation_flux=None),\n", + " Planet(planet_name='GJ 876 c', host_name='GJ 876', discovery_method='Radial Velocity', \n", + " discovery_year=2000, controversial_flag=False, orbital_period=30.0881, \n", + " planet_radius=14.0, planet_mass=226.9846, semi_major_radius=0.12959, eccentricity=0.25591, \n", + " equilibrium_temperature=None, insolation_flux=None)]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c17389b0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'planets_1_list', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d76f9821", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q16\")" + ] + }, + { + "cell_type": "markdown", + "id": "5090b70b", + "metadata": {}, + "source": [ + "**Question 17.1**: What is the **fifth** `Planet` object in `'planets_1.csv'`?\n", + "\n", + "You **must** access from the `planets_1_list` to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3feb36c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'fifth_planet', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af5c2a76", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q17-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "9dabec60", + "metadata": {}, + "source": [ + "**Question 17.2**: What is the `planet_name` of the **fifth** `Planet` in `'planets_1.csv'`?\n", + "\n", + "You **must** access from the `planets_1_list` to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2cb1d3c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'fifth_planet_name', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e798e2c1", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q17-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0ef57a0", + "metadata": {}, + "source": [ + "**Question 17.3**: What is the `controversial_flag` of the **fourth** `Planet` in `'planets_1.csv'`?\n", + "\n", + "You **must** access from the `planets_1_list` to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "322a8419", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'fourth_planet_controversy', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0864d6fe", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q17-3\")" + ] + }, + { + "cell_type": "markdown", + "id": "9bcb57e6", + "metadata": {}, + "source": [ + "### Segment 3.5: Data Cleaning - broken CSV rows\n", + "\n", + "The code you have written worked well for reading the data in `planets_1.csv` and `mapping_1.json`. However, it will likely **not** work for `planets_2.csv` and `mapping_2.json`. This is because the file `planets_2.csv` is **broken**. For some reason, a few rows in `planets_2.csv` have their data jumbled up. This is what `planets_2.csv` looks like:\n", + "\n", + "|Planet Name|Discovery Method|Discovery Year|Controversial Flag|Orbital Period [days]|Planet Radius [Earth Radius]|Planet Mass [Earth Mass]|Orbit Semi-Major Axis [au]|Eccentricity|Equilibrium Temperature [K]|Insolation Flux [Earth Flux]|\n", + "|-----------|----------------|--------------|------------------|---------------------|----------------------------|------------------------|--------------------------|------------|---------------------------|----------------------------|\n", + "|HD 158259 b|Radial Velocity|2020|0|2.17800000|1.292|2.22000|||1478|794.22|\n", + "|K2-187 b|Transit|2018|0|0.77401000|1.200|1.87000|0.016400||1815||\n", + "|K2-187 c|Transit|2018|0|2.87151200|1.400|2.54000|0.039200||1173||\n", + "|K2-187 d|K2-187|Transit|2018|0|7.14958400|2.400|6.35000|0.072000||865|\n", + "|WASP-47 b|2012|Transit|0|4.15914920|12.640|363.60000|0.052000|0.002800|1275|534.00|\n", + "\n", + "We can see that for some reason, in the **fourth** row, the value under the column `Discovery Method` is the name of the planet's host star. This is causing all the other columns in the row to also take meaningless values.\n", + "\n", + "Similarly, in the **fifth** row, we see that the values under the columns `Discovery Method` and `Discovery Year` are swapped.\n", + "\n", + "We will call such a **row** in a CSV file where the values under a column do not match the expected format to be a **broken row**. While it is possible to sometimes extract useful data from broken rows, in this lab and in P10, we will simply **skip** broken rows.\n", + "\n", + "In order to **skip** broken rows, you should first know how to recognize a **broken row**. In general, there is no general rule that helps you identify when a row is broken. This is because CSV rows can be **broken** in all sorts of different ways. Thankfully, we don't have to write code to catch all sorts of weird cases. It will suffice for us to manually **inspect** the file `planets_2.csv`, and identify **how** the rows are broken.\n", + "\n", + "The simplest way to recognize if a row is broken is if you run into any **RunTime Errors** when you execute your code. So, one simple way to skip bad rows would be to use `try/except` blocks to avoid processing any rows that cause the code to crash.\n", + "\n", + "**Important Note:** In this dataset, as you might have already noticed, it would be **significantly harder** to detect **broken rows** where some of the numerical values are swapped (for example, `Planet Radius [Earth Radius]` and `Planet Mass [Earth Mass]`). You may **assume** that the numerical values are **not** swapped in **any** row, and that **only the rows** in which the **data types** are not as expected are **broken**." + ] + }, + { + "cell_type": "markdown", + "id": "141bc8f7", + "metadata": {}, + "source": [ + "**Question 18**: Create a **list** of `Planet` objects of each planet in `\"planets_2.csv\"`.\n", + "\n", + "You **must** skip any broken rows in the CSV file. Your output **must** look like this:\n", + "```python\n", + "[Planet(planet_name='HD 158259 b', host_name='HD 158259', discovery_method='Radial Velocity', \n", + " discovery_year=2020, controversial_flag=False, orbital_period=2.178, \n", + " planet_radius=1.292, planet_mass=2.22, semi_major_radius=None, eccentricity=None, \n", + " equilibrium_temperature=1478.0, insolation_flux=794.22),\n", + " Planet(planet_name='K2-187 b', host_name='K2-187', discovery_method='Transit', \n", + " discovery_year=2018, controversial_flag=False, orbital_period=0.77401, \n", + " planet_radius=1.2, planet_mass=1.87, semi_major_radius=0.0164, eccentricity=None, \n", + " equilibrium_temperature=1815.0, insolation_flux=None),\n", + " Planet(planet_name='K2-187 c', host_name='K2-187', discovery_method='Transit', \n", + " discovery_year=2018, controversial_flag=False, orbital_period=2.871512, \n", + " planet_radius=1.4, planet_mass=2.54, semi_major_radius=0.0392, eccentricity=None, \n", + " equilibrium_temperature=1173.0, insolation_flux=None)]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca37d017", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# replace the ... with your code\n", + "\n", + "planets_2_data = ... # read planets_2.csv\n", + "planets_2_rows = ... # extract the rows from planets_2_data\n", + "mapping_2_json = ... # read mapping_2.json\n", + "\n", + "planets_2_list = []\n", + "for row_idx in range(len(planets_2_rows)):\n", + " try:\n", + " pass # replace with your code\n", + " # create a Planet object and append to 'planets_2_list'\n", + " except ValueError:\n", + " continue\n", + "\n", + "planets_2_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9180c21", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q18\")" + ] + }, + { + "cell_type": "markdown", + "id": "934f06f7", + "metadata": {}, + "source": [ + "**Important Warning:** It is considered a bad coding practice to use *bare* `try/except` blocks. This means that you should **never** write code like this:\n", + "\n", + "```python\n", + "try:\n", + " # some code\n", + "except:\n", + " # some other code\n", + "```\n", + "\n", + "If you use *bare* `try/except` blocks, your code will seemingly work even if there are bugs in there, and it can get very hard to debug. You should always **explicitly** catch for specific errors like this:\n", + "\n", + "```python\n", + "try:\n", + " # some code\n", + "except ValueError:\n", + " # some other code\n", + "except IndexError:\n", + " # some other code\n", + "```\n", + "\n", + "This way, your code will still crash if there is some other unexpected bug in your code that needs to be fixed, and will only go to the `except` block if it runs into a `ValueError` or an `IndexError`. The starter code above already catches specifically for `ValueError`. You **must** continue this practice in P10 as well." + ] + }, + { + "cell_type": "markdown", + "id": "72205ef0", + "metadata": {}, + "source": [ + "### Segment 3.6: Data Cleaning - broken JSON files\n", + "\n", + "So far, we have written code that can read `planets_1.csv` and `mapping_1.json`, as well as `planets_2.csv` and `mapping_2.json`. However, if you try to read `mapping_3.json`, you are likely to run into some issues. This is because the file `mapping_3.json` is **broken**. Unlike **broken** CSV files, where we only had to skip the **broken rows**, it is much harder to parse **broken JSON files**. When a JSON file is **broken**, we often have no choice but to **skip the file entirely**.\n", + "\n", + "It is also not easy to detect if a JSON file is **broken** using `if` statements. The easiest is to simply try to read the file using the `read_json` function and check if the code crashes." + ] + }, + { + "cell_type": "markdown", + "id": "e8f24551", + "metadata": {}, + "source": [ + "**Question 19**: Determine if the `'mapping_3.json'` file is **broken** using a `try/except` block." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9caede3a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we have done this one for you\n", + "\n", + "try:\n", + " mapping_3_json = read_json(os.path.join(\"small_data\", \"mapping_3.json\"))\n", + "except json.JSONDecodeError:\n", + " mapping_3_json = {}\n", + " \n", + "mapping_3_json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "956f9c83", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q19\")" + ] + }, + { + "cell_type": "markdown", + "id": "f9183605", + "metadata": {}, + "source": [ + "In the above cell, note that in the `try/except` block, we specifically checked for the `json.JSONDecodeError`. This is the error that is thrown when you try to call `json.load` on a **broken** JSON file." + ] + }, + { + "cell_type": "markdown", + "id": "5cacfca1", + "metadata": {}, + "source": [ + "## Segment 4: Data Analysis\n", + "\n", + "We have now managed to read all the data in the `small_data` directory. Now is the time to test if our data structures work!" + ] + }, + { + "cell_type": "markdown", + "id": "b81e376e", + "metadata": {}, + "source": [ + "**Question 20.1**: What is the `host_name` of the **second** planet in `'planets_2.csv'`?\n", + "\n", + "You **must** skip any broken rows. So, you can directly access from the list `planets_2_list` to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38e47fd7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'second_planet_host', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7b544c9", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q20-1\")" + ] + }, + { + "cell_type": "markdown", + "id": "47d68311", + "metadata": {}, + "source": [ + "**Question 20.2**: What is the `Star` object of the **third** planet in `'planets_2.csv'`?\n", + "\n", + "You **must** skip any broken rows. So, you can directly access from the list `planets_2_list` to answer this question.\n", + "\n", + "**Hint:** You can use the `stars_dict` **dictionary** defined in q12.2 to find the `Star` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3631118", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'third_planet_star', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dac9696d", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q20-2\")" + ] + }, + { + "cell_type": "markdown", + "id": "40e07b1f", + "metadata": {}, + "source": [ + "**Question 20.3**: What is the `stellar_radius` of the star around which the **first** planet in `'planets_1.csv'` orbits?\n", + "\n", + "You can directly access from the list `planets_1_list` to answer this question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "794a5bb1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# compute and store the answer in the variable 'first_planet_star_radius', then display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "578652d9", + "metadata": { + "deletable": false, + "editable": false + }, + "outputs": [], + "source": [ + "grader.check(\"q20-3\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "otter": { + "OK_FORMAT": true, + "tests": { + "planet_object": { + "name": "planet_object", + "points": 0, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"qplanet_object\", jupiter)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q1-1": { + "name": "q1-1", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q1-1\", all_files)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q1-2": { + "name": "q1-2", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q1-2\", actual_files)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q10-1": { + "name": "q10-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q10-1\", first_star_type)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q10-2": { + "name": "q10-2", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q10-2\", second_star_age)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q10-3": { + "name": "q10-3", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q10-3\", third_star_mass)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q11": { + "name": "q11", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q11\", stars_2_dict)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q12-1": { + "name": "q12-1", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q12-1\", stars_3_dict)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q12-2": { + "name": "q12-2", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q12-2\", stars_dict)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q13-1": { + "name": "q13-1", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q13-1\", planets_1_rows)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q13-2": { + "name": "q13-2", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q13-2\", mapping_1_json)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q14-1": { + "name": "q14-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q14-1\", first_planet_name)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q14-2": { + "name": "q14-2", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q14-2\", first_planet_flux)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q14-3": { + "name": "q14-3", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q14-3\", second_planet_controversy)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q15": { + "name": "q15", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q15\", first_planet)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q16": { + "name": "q16", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q16\", planets_1_list)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q17-1": { + "name": "q17-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q17-1\", fifth_planet)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q17-2": { + "name": "q17-2", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q17-2\", fifth_planet_name)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q17-3": { + "name": "q17-3", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q17-3\", fourth_planet_controversy)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q18": { + "name": "q18", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q18\", planets_2_list)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q19": { + "name": "q19", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q19\", mapping_3_json)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q2": { + "name": "q2", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q2\", files_in_small_data)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q20-1": { + "name": "q20-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q20-1\", second_planet_host)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q20-2": { + "name": "q20-2", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q20-2\", third_planet_star)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q20-3": { + "name": "q20-3", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q20-3\", first_planet_star_radius)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q3-1": { + "name": "q3-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q3-1\", stars_1_path)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q3-2": { + "name": "q3-2", + "points": 4, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q3-2\", paths_in_small_data)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q4-1": { + "name": "q4-1", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q4-1\", json_paths)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q4-2": { + "name": "q4-2", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q4-2\", stars_paths)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q5": { + "name": "q5", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q5\", first_star)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q6": { + "name": "q6", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q6\", second_star)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q7-1": { + "name": "q7-1", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q7-1\", second_star_spectral_type)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q7-2": { + "name": "q7-2", + "points": 1, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q7-2\", first_star_stellar_age)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q7-3": { + "name": "q7-3", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q7-3\", stellar_radius_ratio)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q8": { + "name": "q8", + "points": 5, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q8\", stars_1_dict)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q9-1": { + "name": "q9-1", + "points": 2, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q9-1\", gj_876)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "q9-2": { + "name": "q9-2", + "points": 3, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"q9-2\", gj_876_luminosity)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + }, + "star_object": { + "name": "star_object", + "points": 0, + "suites": [ + { + "cases": [ + { + "code": ">>> practice_test.check(\"qstar_object\", sun)\nTrue", + "hidden": false, + "locked": false + } + ], + "scored": true, + "setup": "", + "teardown": "", + "type": "doctest" + } + ] + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-p10/practice_test.py b/lab-p10/practice_test.py new file mode 100644 index 0000000..60526c4 --- /dev/null +++ b/lab-p10/practice_test.py @@ -0,0 +1,513 @@ +#!/usr/bin/python +import os, json, math +from collections import namedtuple + +MAX_FILE_SIZE = 300 # units - KB +REL_TOL = 6e-04 # relative tolerance for floats +ABS_TOL = 15e-03 # absolute tolerance for floats + +PASS = "PASS" + +TEXT_FORMAT = "text" # question type when expected answer is a str, int, float, or bool +TEXT_FORMAT_NAMEDTUPLE = "text namedtuple" # question type when expected answer is a namedtuple +TEXT_FORMAT_UNORDERED_LIST = "text list_unordered" # question type when the expected answer is a list where the order does *not* matter +TEXT_FORMAT_ORDERED_LIST = "text list_ordered" # question type when the expected answer is a list where the order does matter +TEXT_FORMAT_SPECIAL_ORDERED_LIST = "text list_special_ordered" # question type when the expected answer is a list where order does matter, but with possible ties. Elements are ordered according to values in special_ordered_json (with ties allowed) +TEXT_FORMAT_DICT = "text dict" # question type when the expected answer is a dictionary + +def return_expected_json(): + expected_json = {"1-1": (TEXT_FORMAT_UNORDERED_LIST, ['.DS_Store', + '.ipynb_checkpoints', + 'mapping_1.json', + 'mapping_2.json', + 'mapping_3.json', + 'planets_1.csv', + 'planets_2.csv', + 'planets_3.csv', + 'stars_1.csv', + 'stars_2.csv', + 'stars_3.csv']), + "1-2": (TEXT_FORMAT_UNORDERED_LIST, ['mapping_1.json', + 'mapping_2.json', + 'mapping_3.json', + 'planets_1.csv', + 'planets_2.csv', + 'planets_3.csv', + 'stars_1.csv', + 'stars_2.csv', + 'stars_3.csv']), + "2": (TEXT_FORMAT_ORDERED_LIST, ['stars_3.csv', + 'stars_2.csv', + 'stars_1.csv', + 'planets_3.csv', + 'planets_2.csv', + 'planets_1.csv', + 'mapping_3.json', + 'mapping_2.json', + 'mapping_1.json']), + "3-1": (TEXT_FORMAT, os.path.join("small_data", "stars_1.csv")), + "3-2": (TEXT_FORMAT_ORDERED_LIST, [os.path.join("small_data", "stars_3.csv"), + os.path.join("small_data", "stars_2.csv"), + os.path.join("small_data", "stars_1.csv"), + os.path.join("small_data", "planets_3.csv"), + os.path.join("small_data", "planets_2.csv"), + os.path.join("small_data", "planets_1.csv"), + os.path.join("small_data", "mapping_3.json"), + os.path.join("small_data", "mapping_2.json"), + os.path.join("small_data", "mapping_1.json")]), + "4-1": (TEXT_FORMAT_ORDERED_LIST, [os.path.join("small_data", "mapping_3.json"), + os.path.join("small_data", "mapping_2.json"), + os.path.join("small_data", "mapping_1.json")]), + "4-2": (TEXT_FORMAT_ORDERED_LIST, [os.path.join("small_data", "stars_3.csv"), + os.path.join("small_data", "stars_2.csv"), + os.path.join("small_data", "stars_1.csv")]), + "star_object": (TEXT_FORMAT_NAMEDTUPLE, Star(spectral_type='G2 V', stellar_effective_temperature=5780.0, + stellar_radius=1.0, stellar_mass=1.0, stellar_luminosity=0.0, + stellar_surface_gravity=4.44, stellar_age=4.6)), + "5": (TEXT_FORMAT_NAMEDTUPLE, Star(spectral_type='G8V', stellar_effective_temperature=5172.0, + stellar_radius=0.94, stellar_mass=0.91, stellar_luminosity=-0.197, + stellar_surface_gravity=4.43, stellar_age=10.2)), + "6": (TEXT_FORMAT_NAMEDTUPLE, Star(spectral_type='F8 V', stellar_effective_temperature=6196.0, + stellar_radius=1.26, stellar_mass=1.21, stellar_luminosity=0.32, + stellar_surface_gravity=4.41, stellar_age=2.01)), + "7-1": (TEXT_FORMAT, "F8 V"), + "7-2": (TEXT_FORMAT, 10.2), + "7-3": (TEXT_FORMAT, 0.7460317460317459), + "7": (TEXT_FORMAT, 0.016741496598639403), + "8": (TEXT_FORMAT_DICT, {'55 Cnc': Star(spectral_type='G8V', stellar_effective_temperature=5172.0, + stellar_radius=0.94, stellar_mass=0.91, stellar_luminosity=-0.197, + stellar_surface_gravity=4.43, stellar_age=10.2), + 'DMPP-1': Star(spectral_type='F8 V', stellar_effective_temperature=6196.0, + stellar_radius=1.26, stellar_mass=1.21, stellar_luminosity=0.32, + stellar_surface_gravity=4.41, stellar_age=2.01), + 'GJ 876': Star(spectral_type='M2.5V', stellar_effective_temperature=3271.0, + stellar_radius=0.3, stellar_mass=0.32, stellar_luminosity=-1.907, + stellar_surface_gravity=4.87, stellar_age=1.0)}), + "9-1": (TEXT_FORMAT_NAMEDTUPLE, Star(spectral_type='M2.5V', stellar_effective_temperature=3271.0, + stellar_radius=0.3, stellar_mass=0.32, stellar_luminosity=-1.907, + stellar_surface_gravity=4.87, stellar_age=1.0)), + "9-2": (TEXT_FORMAT, -1.907), + "10-1": (TEXT_FORMAT, 'G0'), + "10-2": (TEXT_FORMAT, None), + "10-3": (TEXT_FORMAT, 1.04), + "11": (TEXT_FORMAT_DICT, {'HD 158259': Star(spectral_type='G0', stellar_effective_temperature=5801.89, + stellar_radius=1.21, stellar_mass=1.08, stellar_luminosity=0.212, + stellar_surface_gravity=4.25, stellar_age=None), + 'K2-187': Star(spectral_type=None, stellar_effective_temperature=5438.0, + stellar_radius=0.83, stellar_mass=0.97, stellar_luminosity=-0.21, + stellar_surface_gravity=4.6, stellar_age=None), + 'WASP-47': Star(spectral_type=None, stellar_effective_temperature=5552.0, + stellar_radius=1.14, stellar_mass=1.04, stellar_luminosity=0.032, + stellar_surface_gravity=4.34, stellar_age=6.5)}), + "12-1": (TEXT_FORMAT_DICT, {'K2-133': Star(spectral_type='M1.5 V', stellar_effective_temperature=3655.0, + stellar_radius=0.46, stellar_mass=0.46, stellar_luminosity=-1.479, + stellar_surface_gravity=4.77, stellar_age=None), + 'K2-138': Star(spectral_type='G8 V', stellar_effective_temperature=5356.3, + stellar_radius=0.86, stellar_mass=0.94, stellar_luminosity=-0.287, + stellar_surface_gravity=4.54, stellar_age=2.8), + 'GJ 667 C': Star(spectral_type='M1.5 V', stellar_effective_temperature=3350.0, + stellar_radius=None, stellar_mass=0.33, stellar_luminosity=-1.863, + stellar_surface_gravity=4.69, stellar_age=2.0)}), + "12-2": (TEXT_FORMAT_DICT, {'55 Cnc': Star(spectral_type='G8V', stellar_effective_temperature=5172.0, + stellar_radius=0.94, stellar_mass=0.91, stellar_luminosity=-0.197, + stellar_surface_gravity=4.43, stellar_age=10.2), + 'DMPP-1': Star(spectral_type='F8 V', stellar_effective_temperature=6196.0, + stellar_radius=1.26, stellar_mass=1.21, stellar_luminosity=0.32, + stellar_surface_gravity=4.41, stellar_age=2.01), + 'GJ 876': Star(spectral_type='M2.5V', stellar_effective_temperature=3271.0, + stellar_radius=0.3, stellar_mass=0.32, stellar_luminosity=-1.907, + stellar_surface_gravity=4.87, stellar_age=1.0), + 'HD 158259': Star(spectral_type='G0', stellar_effective_temperature=5801.89, + stellar_radius=1.21, stellar_mass=1.08, stellar_luminosity=0.212, + stellar_surface_gravity=4.25, stellar_age=None), + 'K2-187': Star(spectral_type=None, stellar_effective_temperature=5438.0, + stellar_radius=0.83, stellar_mass=0.97, stellar_luminosity=-0.21, + stellar_surface_gravity=4.6, stellar_age=None), + 'WASP-47': Star(spectral_type=None, stellar_effective_temperature=5552.0, + stellar_radius=1.14, stellar_mass=1.04, stellar_luminosity=0.032, + stellar_surface_gravity=4.34, stellar_age=6.5), + 'K2-133': Star(spectral_type='M1.5 V', stellar_effective_temperature=3655.0, + stellar_radius=0.46, stellar_mass=0.46, stellar_luminosity=-1.479, + stellar_surface_gravity=4.77, stellar_age=None), + 'K2-138': Star(spectral_type='G8 V', stellar_effective_temperature=5356.3, + stellar_radius=0.86, stellar_mass=0.94, stellar_luminosity=-0.287, + stellar_surface_gravity=4.54, stellar_age=2.8), + 'GJ 667 C': Star(spectral_type='M1.5 V', stellar_effective_temperature=3350.0, + stellar_radius=None, stellar_mass=0.33, stellar_luminosity=-1.863, + stellar_surface_gravity=4.69, stellar_age=2.0)}), + "planet_object": (TEXT_FORMAT_NAMEDTUPLE, Planet(planet_name='Jupiter', host_name='Sun', discovery_method='Imaging', + discovery_year=1610, controversial_flag=False, orbital_period=4333.0, + planet_radius=11.209, planet_mass=317.828, semi_major_radius=5.2038, + eccentricity=0.0489, equilibrium_temperature=110, insolation_flux=0.0345)), + "13-1": (TEXT_FORMAT_ORDERED_LIST, [['55 Cnc b', + 'Radial Velocity', + '1996', + '0', + '14.65160000', + '13.900', + '263.97850', + '0.113400', + '0.000000', + '700', + ''], + ['55 Cnc c', + 'Radial Velocity', + '2004', + '0', + '44.39890000', + '8.510', + '54.47380', + '0.237300', + '0.030000', + '', + ''], + ['DMPP-1 b', + 'Radial Velocity', + '2019', + '0', + '18.57000000', + '5.290', + '24.27000', + '0.146200', + '0.083000', + '877', + ''], + ['GJ 876 b', + 'Radial Velocity', + '1998', + '0', + '61.11660000', + '13.300', + '723.22350', + '0.208317', + '0.032400', + '', + ''], + ['GJ 876 c', + 'Radial Velocity', + '2000', + '0', + '30.08810000', + '14.000', + '226.98460', + '0.129590', + '0.255910', + '', + '']]), + "13-2": (TEXT_FORMAT_DICT, {'55 Cnc b': '55 Cnc', + '55 Cnc c': '55 Cnc', + 'DMPP-1 b': 'DMPP-1', + 'GJ 876 b': 'GJ 876', + 'GJ 876 c': 'GJ 876'}), + "14-1": (TEXT_FORMAT, '55 Cnc b'), + "14-2": (TEXT_FORMAT, None), + "14-3": (TEXT_FORMAT, False), + "15": (TEXT_FORMAT_NAMEDTUPLE, Planet(planet_name='55 Cnc b', host_name='55 Cnc', discovery_method='Radial Velocity', + discovery_year=1996, controversial_flag=False, orbital_period=14.6516, + planet_radius=13.9, planet_mass=263.9785, semi_major_radius=0.1134, + eccentricity=0.0, equilibrium_temperature=700.0, insolation_flux=None)), + "16": (TEXT_FORMAT_ORDERED_LIST, [Planet(planet_name='55 Cnc b', host_name='55 Cnc', discovery_method='Radial Velocity', + discovery_year=1996, controversial_flag=False, orbital_period=14.6516, + planet_radius=13.9, planet_mass=263.9785, semi_major_radius=0.1134, + eccentricity=0.0, equilibrium_temperature=700.0, insolation_flux=None), + Planet(planet_name='55 Cnc c', host_name='55 Cnc', discovery_method='Radial Velocity', + discovery_year=2004, controversial_flag=False, orbital_period=44.3989, + planet_radius=8.51, planet_mass=54.4738, semi_major_radius=0.2373, + eccentricity=0.03, equilibrium_temperature=None, insolation_flux=None), + Planet(planet_name='DMPP-1 b', host_name='DMPP-1', discovery_method='Radial Velocity', + discovery_year=2019, controversial_flag=False, orbital_period=18.57, + planet_radius=5.29, planet_mass=24.27, semi_major_radius=0.1462, + eccentricity=0.083, equilibrium_temperature=877.0, insolation_flux=None), + Planet(planet_name='GJ 876 b', host_name='GJ 876', discovery_method='Radial Velocity', + discovery_year=1998, controversial_flag=False, orbital_period=61.1166, + planet_radius=13.3, planet_mass=723.2235, semi_major_radius=0.208317, + eccentricity=0.0324, equilibrium_temperature=None, insolation_flux=None), + Planet(planet_name='GJ 876 c', host_name='GJ 876', discovery_method='Radial Velocity', + discovery_year=2000, controversial_flag=False, orbital_period=30.0881, + planet_radius=14.0, planet_mass=226.9846, semi_major_radius=0.12959, + eccentricity=0.25591, equilibrium_temperature=None, insolation_flux=None)]), + "17-1": (TEXT_FORMAT_NAMEDTUPLE, Planet(planet_name='GJ 876 c', host_name='GJ 876', discovery_method='Radial Velocity', + discovery_year=2000, controversial_flag=False, orbital_period=30.0881, + planet_radius=14.0, planet_mass=226.9846, semi_major_radius=0.12959, + eccentricity=0.25591, equilibrium_temperature=None, insolation_flux=None)), + "17-2": (TEXT_FORMAT, 'GJ 876 c'), + "17-3": (TEXT_FORMAT, False), + "18": (TEXT_FORMAT_ORDERED_LIST, [Planet(planet_name='HD 158259 b', host_name='HD 158259', + discovery_method='Radial Velocity', discovery_year=2020, + controversial_flag=False, orbital_period=2.178, planet_radius=1.292, + planet_mass=2.22, semi_major_radius=None, eccentricity=None, + equilibrium_temperature=1478.0, insolation_flux=794.22), + Planet(planet_name='K2-187 b', host_name='K2-187', discovery_method='Transit', + discovery_year=2018, controversial_flag=False, orbital_period=0.77401, + planet_radius=1.2, planet_mass=1.87, semi_major_radius=0.0164, + eccentricity=None, equilibrium_temperature=1815.0, insolation_flux=None), + Planet(planet_name='K2-187 c', host_name='K2-187', discovery_method='Transit', + discovery_year=2018, controversial_flag=False, orbital_period=2.871512, + planet_radius=1.4, planet_mass=2.54, semi_major_radius=0.0392, + eccentricity=None, equilibrium_temperature=1173.0, insolation_flux=None)]), + "19": (TEXT_FORMAT_DICT, {}), + "20-1": (TEXT_FORMAT, 'K2-187'), + "20-2": (TEXT_FORMAT_NAMEDTUPLE, Star(spectral_type=None, stellar_effective_temperature=5438.0, + stellar_radius=0.83, stellar_mass=0.97, stellar_luminosity=-0.21, + stellar_surface_gravity=4.6, stellar_age=None)), + "20-3": (TEXT_FORMAT, 0.94)} + return expected_json + +def check_cell(qnum, actual): + expected_json = return_expected_json() + format, expected = expected_json[qnum[1:]] + try: + if format == TEXT_FORMAT: + return simple_compare(expected, actual) + elif format == TEXT_FORMAT_UNORDERED_LIST: + return list_compare_unordered(expected, actual) + elif format == TEXT_FORMAT_ORDERED_LIST: + return list_compare_ordered(expected, actual) + elif format == TEXT_FORMAT_DICT: + return dict_compare(expected, actual) + elif format == TEXT_FORMAT_NAMEDTUPLE: + return namedtuple_compare(expected ,actual) + else: + if expected != actual: + return "expected %s but found %s " % (repr(expected), repr(actual)) + except: + if expected != actual: + return "expected %s" % (repr(expected)) + return PASS + + + +def simple_compare(expected, actual, complete_msg=True): + msg = PASS + if type(expected) == type: + if expected != actual: + if type(actual) == type: + msg = "expected %s but found %s" % (expected.__name__, actual.__name__) + else: + msg = "expected %s but found %s" % (expected.__name__, repr(actual)) + elif type(expected) != type(actual) and not (type(expected) in [float, int] and type(actual) in [float, int]): + msg = "expected to find type %s but found type %s" % (type(expected).__name__, type(actual).__name__) + elif type(expected) == float: + if not math.isclose(actual, expected, rel_tol=REL_TOL, abs_tol=ABS_TOL): + msg = "expected %s" % (repr(expected)) + if complete_msg: + msg = msg + " but found %s" % (repr(actual)) + else: + if expected != actual: + msg = "expected %s" % (repr(expected)) + if complete_msg: + msg = msg + " but found %s" % (repr(actual)) + return msg + +namedtuples = ['Star', 'Planet'] +star_attributes = ['spectral_type', + 'stellar_effective_temperature', + 'stellar_radius', + 'stellar_mass', + 'stellar_luminosity', + 'stellar_surface_gravity', + 'stellar_age'] +# Create a namedtuple type, Star +Star = namedtuple("Star", star_attributes) +planets_attributes = ['planet_name', + 'host_name', + 'discovery_method', + 'discovery_year', + 'controversial_flag', + 'orbital_period', + 'planet_radius', + 'planet_mass', + 'semi_major_radius', + 'eccentricity', + 'equilibrium_temperature', + 'insolation_flux'] +# Create a namedtuple type, Planet +Planet = namedtuple("Planet", planets_attributes) + +def namedtuple_compare(expected, actual): + msg = PASS + try: + actual_fields = actual._fields + except AttributeError: + msg = "expected namedtuple but found %s" % (type(actual).__name__) + return msg + if type(expected).__name__ != type(actual).__name__: + msg = "expected namedtuple %s but found namedtuple %s" % (type(expected).__name__, type(actual).__name__) + return msg + expected_fields = expected._fields + msg = list_compare_ordered(list(expected_fields), list(actual_fields), "namedtuple attributes") + if msg != PASS: + return msg + for field in expected_fields: + val = simple_compare(getattr(expected, field), getattr(actual, field)) + if val != PASS: + msg = "at attribute %s of namedtuple %s, " % (field, type(expected).__name__) + val + return msg + return msg + + +def list_compare_ordered(expected, actual, obj="list"): + msg = PASS + if type(expected) != type(actual): + msg = "expected to find type %s but found type %s" % (type(expected).__name__, type(actual).__name__) + return msg + for i in range(len(expected)): + if i >= len(actual): + msg = "expected missing %s in %s" % (repr(expected[i]), obj) + break + if type(expected[i]) in [int, float, bool, str]: + val = simple_compare(expected[i], actual[i]) + elif type(expected[i]) in [list]: + val = list_compare_ordered(expected[i], actual[i], "sub" + obj) + elif type(expected[i]) in [dict]: + val = dict_compare(expected[i], actual[i]) + elif type(expected[i]).__name__ in namedtuples: + val = namedtuple_compare(expected[i], actual[i]) + if val != PASS: + msg = "at index %d of the %s, " % (i, obj) + val + break + if len(actual) > len(expected) and msg == PASS: + msg = "found unexpected %s in %s" % (repr(actual[len(expected)]), obj) + if len(expected) != len(actual): + msg = msg + " (found %d entries in %s, but expected %d)" % (len(actual), obj, len(expected)) + + if len(expected) > 0 and type(expected[0]) in [int, float, bool, str]: + if msg != PASS and list_compare_unordered(expected, actual, obj) == PASS: + try: + msg = msg + " (%s may not be ordered as required)" % (obj) + except: + pass + return msg + + +def list_compare_helper(larger, smaller): + msg = PASS + j = 0 + for i in range(len(larger)): + if i == len(smaller): + msg = "expected %s" % (repr(larger[i])) + break + found = False + while not found: + if j == len(smaller): + val = simple_compare(larger[i], smaller[j - 1], False) + break + val = simple_compare(larger[i], smaller[j], False) + j += 1 + if val == PASS: + found = True + break + if not found: + msg = val + break + return msg + + +def list_compare_unordered(expected, actual, obj="list"): + msg = PASS + if type(expected) != type(actual): + msg = "expected to find type %s but found type %s" % (type(expected).__name__, type(actual).__name__) + return msg + try: + sort_expected = sorted(expected) + sort_actual = sorted(actual) + except: + msg = "unexpected datatype found in %s; expected entries of type %s" % (obj, obj, type(expected[0]).__name__) + return msg + + if len(actual) == 0 and len(expected) > 0: + msg = "in the %s, missing" % (obj) + expected[0] + elif len(actual) > 0 and len(expected) > 0: + val = simple_compare(sort_expected[0], sort_actual[0]) + if val.startswith("expected to find type"): + msg = "in the %s, " % (obj) + simple_compare(sort_expected[0], sort_actual[0]) + else: + if len(expected) > len(actual): + msg = "in the %s, missing " % (obj) + list_compare_helper(sort_expected, sort_actual) + elif len(expected) < len(actual): + msg = "in the %s, found un" % (obj) + list_compare_helper(sort_actual, sort_expected) + if len(expected) != len(actual): + msg = msg + " (found %d entries in %s, but expected %d)" % (len(actual), obj, len(expected)) + return msg + else: + val = list_compare_helper(sort_expected, sort_actual) + if val != PASS: + msg = "in the %s, missing " % (obj) + val + ", but found un" + list_compare_helper(sort_actual, + sort_expected) + return msg + +def list_compare_special_init(expected, special_order): + real_expected = [] + for i in range(len(expected)): + if real_expected == [] or special_order[i-1] != special_order[i]: + real_expected.append([]) + real_expected[-1].append(expected[i]) + return real_expected + + +def list_compare_special(expected, actual, special_order): + expected = list_compare_special_init(expected, special_order) + msg = PASS + expected_list = [] + for expected_item in expected: + expected_list.extend(expected_item) + val = list_compare_unordered(expected_list, actual) + if val != PASS: + msg = val + else: + i = 0 + for expected_item in expected: + j = len(expected_item) + actual_item = actual[i: i + j] + val = list_compare_unordered(expected_item, actual_item) + if val != PASS: + if j == 1: + msg = "at index %d " % (i) + val + else: + msg = "between indices %d and %d " % (i, i + j - 1) + val + msg = msg + " (list may not be ordered as required)" + break + i += j + + return msg + + +def dict_compare(expected, actual, obj="dict"): + msg = PASS + if type(expected) != type(actual): + msg = "expected to find type %s but found type %s" % (type(expected).__name__, type(actual).__name__) + return msg + try: + expected_keys = sorted(list(expected.keys())) + actual_keys = sorted(list(actual.keys())) + except: + msg = "unexpected datatype found in keys of dict; expect a dict with keys of type %s" % ( + type(expected_keys[0]).__name__) + return msg + val = list_compare_unordered(expected_keys, actual_keys, "dict") + if val != PASS: + msg = "bad keys in %s: " % (obj) + val + if msg == PASS: + for key in expected: + if expected[key] == None or type(expected[key]) in [int, float, bool, str]: + val = simple_compare(expected[key], actual[key]) + elif type(expected[key]) in [list]: + val = list_compare_ordered(expected[key], actual[key], "value") + elif type(expected[key]) in [dict]: + val = dict_compare(expected[key], actual[key], "sub" + obj) + elif type(expected[key]).__name__ in namedtuples: + val = namedtuple_compare(expected[key], actual[key]) + if val != PASS: + msg = "incorrect val for key %s in %s: " % (repr(key), obj) + val + return msg + +def check(qnum, actual): + msg = check_cell(qnum, actual) + if msg == PASS: + return True + print("<b style='color: red;'>ERROR:</b> " + msg) + + +def check_file_size(path): + size = os.path.getsize(path) + assert size < MAX_FILE_SIZE * 10**3, "Your file is too big to be processed by Gradescope; please delete unnecessary output cells so your file size is < %s KB" % MAX_FILE_SIZE diff --git a/lab-p10/small_data.zip b/lab-p10/small_data.zip new file mode 100644 index 0000000000000000000000000000000000000000..8b8ce70c41ceda32004cec125292f696dcc73c63 GIT binary patch literal 3468 zcmai%3p|tiAICQ{w}r`E&f$dRmRoEtbB;SnGPyLmkjs!|*4UW2H3`unmpLhlk`$$j z$R#QY%_U3!Bx#OZDixAM=>MSpo^sB==k@wMzrFT)_WnG-&-eHHe!p|t3gZ`nEM5(! z{)D9uUvI*|tYhQq<x1Zl>hJWgk4Y{*Mxrt)zFx#Ye_{}IKZ!!8ZCv~T3c?2wf^^vV z6O;!h3k-k<slX5eMz9Z+N}}xb($m>T+fONV!x#{#9b4V(T&}sWu4U7`wG#ZcJInA` z5KrsFyM*MFU%xBz_Mr$!BwWd`$;z+^Vpc;|1ke`#1k(9;=Zyex#18_PuLAF>&bs;) zx|oXXiy9WxY&wd4GnFo?-KMk_X_y&^apR)4bE(b!5KqbT5>9C@+8bp+5`}nwf@Mpr zxgmgO{UCw;CSmx{K;O{Kj<CFj0xd*>a@BeGGF#ah#bj)RW3e>&-os|H+qM)}{&fri zQjl`;CrJIdt&9SNK$7_&5K%ydO7@}n(`kT9L|S-o`tG1!T@;HaI4?d6P3fF}k2^uX zu5z-c$(!F?y)sZ=zBc=9#@y%M?vrG-_)gR4GaZcWVfEsYkrU0V?htaKxzb6lp+nN? z`ModP$Ax*~^XyPFTi)rUj#~qlNeXIqCFuj5XFNwA5K~Lxv+XxR)^RB>y69DY-Hn$N zE^jt2laZcXmljenbt~zRSw7lhf|hE8mn;Y;573)(r;Woy@&}@A%o`@OmCf5X6!g(j z6+VZ&oI@q8n;Y)h94Ft+$Y)zk$Hafc)v~4@bc(|7=XK@yswTTRxXR`T4|xCms~b`3 z9$QycxgDwvlgn`roK#@DYCttqBsyx|H>|y<?v`n{o~#<;bN3)>h;RIHZ^^E!a+!IU zV=7Qz9f1_8U|Qz*%`B;+VZ?(JmrhxWSf<tEm%TB&E%pi2y$a&>v`{T^GDZ8vbOTb9 zj~y9Ih$tsT9Nc7hg{;Jx^U0y%^RjCKLyJ8uHHUU6Ex=cHmn0Av$&1MnL?AHkKLiG| z7#PoVu4AJjDsk-Co8KZ>SiWMXmqpxPQjhEG(+_wB4@&l}aob?<?rlPaBUYznMtqJp z<I|V(SRgbgTw2G&E6nTh*nLjojk1OAh^tMF%x-~v`j~^Yj@+aOBX-mZ6*TJUo`nzo z7$ZC?5xIY#(FebBeY2bR*AZ2i`3d#^47H%e!$+hwC!aGzKfWE1!m_Vi)R5a-%SkZR zG8gQwy2NNqGkMyVG^M)cd1gCaxI36<pjuUyI+HlYgI>@XDKAAd6xr@f*FKBW`;--2 z1C#m%<IQ4gwm;-^yn6eIg@XtSQDiBFHXJUO^PpZrk8pD<8A`L&X|cB1YyaG~&{#%r z`4e;M2>-x|t+=%%u6(wzW6>W}VJ;g~t=#(KZWBIkKJTODXqgh~Y-fsoc&f|6QPH4& z%SfnMlv&h{jW&^6^#?K<ESz+Xv>aAw%xD@<ANr-e=WaSqYE78P5&MH}HjgHI#8;w$ z1fn7Ohe!+whz9M4XkeG3k%4z?)I}yt+daD@n6qo)Xq}A{vcL*1Buv;@mAonSA-}vJ zx0k2})wmgnCB}^PlvBu~=$i5<L7Do?HQQvw0*405N{{}!u|ohi_Ea{sFRHhowW+=# z=E-bMN(G|6lzXl+ss7Luf6SE%g~;nWpCTEwsXsh=78vX><Ex#8y`S%K>cgNXV`k|0 z#8k9OIV$_TW+GTa&HZw>L}Q;6s@*uixYMMByI!sRcg@?{CR<*`2HyA-F+GeK8QLn% zrVaLQcv3knA$wXtNH?KKntc|AlF5TK+YhFAa}w<Na-2!$?mKV;qZ~G1;I#Y6#}zXJ zS`I1aP|JqY53L*7F0FXIVeqK?{78sa)}(B_Prw@WwpVSER@^Nu8JpE%ZaKs~^Z9C5 z70>8O9+l&R8<K~&1b@2wqPnm`prqw+ZH=Y9)Jqe1o2El+Y&hPCf{r7_RyFB2=iR#8 z!kwB6<qVSVG>PS%iiWMk<N}c3-)1hH8wFC92M`woFw^Khp<fdhKFu>IUKcnYh<Wkq zHypk8WCd>PHF2NZ!nT3b59q2L{H`kd?i{#vmRMo5(U^J4NX^Sdvn7g<YFjyon-2e# z&}(nKXY}bFBlk^d_S2k}5KIbNp}2NJ`@jj3qW^R4E-S3@v*VQ`3DJ%<)pToY$60di z<nXYGwn@H&w&o_6^_Rb_y@oGhG={e-+@0@<RX^EIsuQt(AALbF6(QtI<;d!u;MYN# z*y>3jJ8uj6v9XnV>im-8lg6d9_|OjMZVS$OnZO{ky=5$Kp4Djan6k8lUtS{olSe;K z<cqYt$ys5VrY}^irwLT_qRspXUnElB^#fkmf9J_k&cJs#1D+rS#~F9V(wocjdQ?<{ zqqWGmmxfJRA1r5)9$E*FudV36!86{`*BZk3qR?Eod#vE4r-4B4wXSDf`Zbjmai-k{ zmvVE7Y9moHZ*cHt&Wj-(v$yV8(TiF&VY|AF{&<GnHh<!#WBts##DKh=J>lK$mzg^z zeMKrR$+yPW%`3&_a?HhgFU<*xyGN?$H>#xVIhE6C`+7|%f3OoZE3V(g$r5IFlK);& zBJOvNO^zwc?ZV%OQ772;l}Uafc9f*Sj--M0&WK05beyQC|B6b{Ki68f{Z3lYUEC~m zWpg9~l`|u;UhM^7#Ln-GSURrwG<;CJ1u}6gZZ66AoKN4ycg4+4DV_SJk2;SGUU_05 zM9C(=7CxuVckW3%&dg^#XI4DVxJesAl3~X#G?J)zBFuy!RW@21e`N9`B3w2;et_<I z*yywKQBxGN*0D6Qa{dhSjr)-QhpD~2-HrU4x}Jyg+8Rqni08?#6cHa61n-gEXRUAX z)V|z)A*#SRNA06HZb8u;JM^;09jh`kF}AW&uZwN;q<&X9xDOvovc|+;9CUFuD-Q^8 zwM5%W7G`80RDPse+2{A5fSuT^!rE#mSO9gMsB)GU)z}2f74>r33gtsV{;kJS27Fn( z6qoLxj!We0S5Sy7q;r~_j+%G^WpV2@WI*%(LrX%wp^*Su4H*B>rGZ8I*99+$=o-E% zF$P4^KP`(bi~L(Jw=4o(df6R-$oHKcm`_D3w<!SJZ~WVl&B(>YfcdLiIWWH(#9wag z!2ANqo(s}H;jixFz<haNe|~%a<*p9Q_Z}zdsr-b$x{CwzV*&nmJslX&Iy>Q|2Ewy6 z23Og$x)%ZCWne4#r7Q!+kC#k%{0zUkxPtLT{44mSvkS)S1_bZ?8Gd!00OO4XzN;Ew z_&80d5WI8#$(q#(7YxS>euJ+vhV`4XuN2t#+~HNeuTElMxSP;7_-b1uM<kZ^O!4T_ ZnPUA94hNRISSk?^6yU4x;-4+Ze*sDR2zUSh literal 0 HcmV?d00001 -- GitLab