diff --git a/src/cs598_dlh_final_project.ipynb b/src/cs598_dlh_final_project.ipynb
index 1cb485799937731841e3bbc3a375323dbb0ea1ab..05528cc443d12a80d8952fc616b587b1c2a29593 100644
--- a/src/cs598_dlh_final_project.ipynb
+++ b/src/cs598_dlh_final_project.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 38,
    "id": "5da368cd-3045-4ec0-86fb-2ce15b5d1b92",
    "metadata": {},
    "outputs": [],
@@ -19,7 +19,7 @@
     "import os\n",
     "\n",
     "# Typing includes.\n",
-    "from typing import Dict, List, Optional\n",
+    "from typing import Dict, List, Optional, Any\n",
     "\n",
     "# Numerical includes.\n",
     "import numpy as np\n",
@@ -230,19 +230,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 98,
    "id": "b881e548-4d27-4725-8c47-b2612157929e",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "IndentationError",
-     "evalue": "expected an indented block (845818540.py, line 69)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"/var/folders/bw/pyw_1xcj0f302h0krt1_f5lm0000gn/T/ipykernel_27199/845818540.py\"\u001b[0;36m, line \u001b[0;32m69\u001b[0m\n\u001b[0;31m    def parse_d_items(self, patients: Dict[str, Patient]) -> Dict[str, Patient]:\u001b[0m\n\u001b[0m      ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "class MIMIC3DatasetWrapper(MIMIC3Dataset):\n",
     "    ''' Add extra tables to the MIMIC III dataset.\n",
@@ -254,78 +245,170 @@
     "      The text data is generally joined on the PATIENTID, HADMID, ITEMID to match the\n",
     "      pyHealth Vists class representation.\n",
     "    '''\n",
+    "   \n",
+    "    # We need to add storage for text-based lookup tables here.\n",
+    "    def __init__(self, *args, **kwargs):\n",
+    "        self._valid_text_tables = [\"D_ICD_DIAGNOSES\", \"D_ITEMS\", \"D_ICD_PROCEDURES\", \"D_LABITEMS\"]\n",
+    "        self._text_descriptions = {x: {} for x in self._valid_text_tables}\n",
+    "        super().__init__(*args, **kwargs)\n",
     "    \n",
-    "    # Skip init and defer to base class.\n",
+    "    def get_all_tables(self) -> List[str]: \n",
+    "        return list(self._text_descriptions.keys())\n",
+    "        \n",
+    "    def get_text_dict(self, table_name: str) -> Dict[str, Dict[Any, Any]]:\n",
+    "        return self._text_descriptions.get(table_name)\n",
     "    \n",
     "    # Note the name has to match the table name exactly.\n",
     "    # See https://github.com/sunlabuiuc/PyHealth/blob/master/pyhealth/datasets/mimic3.py#L71.\n",
     "    def parse_d_icd_diagnoses(self, patients: Dict[str, Patient]) -> Dict[str, Patient]: \n",
-    "        # TODO(botelho3) fill this in to join the text descriptions to the visit.\n",
+    "        \"\"\"Helper function which parses D_ICD_DIAGNOSIS table.\n",
+    "        Will be called in `self.parse_tables()`\n",
+    "        Docs:\n",
+    "            - D_ICD_DIAGNOSIS: https://mimic.mit.edu/docs/iii/tables/d_icd_diagnoses/\n",
+    "        Args:\n",
+    "            patients: a dict of `Patient` objects indexed by patient_id.\n",
+    "        Returns:\n",
+    "            The updated patients dict.\n",
+    "        Note:\n",
+    "            N/A\n",
+    "        \"\"\"\n",
+    "        table = \"D_ICD_DIAGNOSES\"\n",
+    "        print(f\"Parsing {table}\")\n",
+    "        assert(table in self._valid_text_tables)\n",
+    "        \n",
+    "        # read table\n",
+    "        df = pd.read_csv(\n",
+    "            os.path.join(self.root, f\"{table}.csv\"),\n",
+    "            usecols=[\"ICD9_CODE\", \"SHORT_TITLE\", \"LONG_TITLE\"],\n",
+    "            dtype={\"ICD9_CODE\": str, \"SHORT_TITLE\": str, \"LONG_TITLE\": str}\n",
+    "        )\n",
+    "        \n",
+    "        # drop rows with missing values\n",
+    "        df = df.dropna(subset=[\"ICD9_CODE\", \"SHORT_TITLE\", \"LONG_TITLE\"])\n",
+    "        # sort by sequence number (i.e., priority)\n",
+    "        df = df.sort_values([\"ICD9_CODE\"], ascending=True)\n",
+    "       \n",
+    "        # print(df.head())\n",
+    "        self._text_descriptions[table] = df.reset_index(drop=True).to_dict(orient='split')\n",
+    "        \n",
+    "        # We haven't altered the patients array, just return it.\n",
     "        return patients\n",
-    "#                 \"\"\"Helper function which parses DIAGNOSES_ICD table.\n",
-    "#         Will be called in `self.parse_tables()`\n",
-    "#         Docs:\n",
-    "#             - DIAGNOSES_ICD: https://mimic.mit.edu/docs/iii/tables/diagnoses_icd/\n",
-    "#         Args:\n",
-    "#             patients: a dict of `Patient` objects indexed by patient_id.\n",
-    "#         Returns:\n",
-    "#             The updated patients dict.\n",
-    "#         Note:\n",
-    "#             MIMIC-III does not provide specific timestamps in DIAGNOSES_ICD\n",
-    "#                 table, so we set it to None.\n",
-    "#         \"\"\"\n",
-    "#         table = \"DIAGNOSES_ICD\"\n",
-    "#         # read table\n",
-    "#         df = pd.read_csv(\n",
-    "#             os.path.join(self.root, f\"{table}.csv\"),\n",
-    "#             dtype={\"SUBJECT_ID\": str, \"HADM_ID\": str, \"ICD9_CODE\": str},\n",
-    "#         )\n",
-    "#         # drop records of the other patients\n",
-    "#         df = df[df[\"SUBJECT_ID\"].isin(patients.keys())]\n",
-    "#         # drop rows with missing values\n",
-    "#         df = df.dropna(subset=[\"SUBJECT_ID\", \"HADM_ID\", \"ICD9_CODE\"])\n",
-    "#         # sort by sequence number (i.e., priority)\n",
-    "#         df = df.sort_values([\"SUBJECT_ID\", \"HADM_ID\", \"SEQ_NUM\"], ascending=True)\n",
-    "#         # group by patient and visit\n",
-    "#         group_df = df.groupby(\"SUBJECT_ID\")\n",
-    "\n",
-    "#         # parallel unit of diagnosis (per patient)\n",
-    "#         def diagnosis_unit(p_id, p_info):\n",
-    "#             events = []\n",
-    "#             for v_id, v_info in p_info.groupby(\"HADM_ID\"):\n",
-    "#                 for code in v_info[\"ICD9_CODE\"]:\n",
-    "#                     event = Event(\n",
-    "#                         code=code,\n",
-    "#                         table=table,\n",
-    "#                         vocabulary=\"ICD9CM\",\n",
-    "#                         visit_id=v_id,\n",
-    "#                         patient_id=p_id,\n",
-    "#                     )\n",
-    "#                     events.append(event)\n",
-    "#             return events\n",
-    "\n",
-    "#         # parallel apply\n",
-    "#         group_df = group_df.parallel_apply(\n",
-    "#             lambda x: diagnosis_unit(x.SUBJECT_ID.unique()[0], x)\n",
-    "#         )\n",
-    "\n",
-    "#         # summarize the results\n",
-    "#         patients = self._add_events_to_patient_dict(patients, group_df)\n",
-    "#         return patients\n",
+    "    \n",
+    "    def parse_d_labitems(self, patients: Dict[str, Patient]) -> Dict[str, Patient]: \n",
+    "        \"\"\"Helper function which parses D_LABITEMS table.\n",
+    "        Will be called in `self.parse_tables()`\n",
+    "        Docs:\n",
+    "            - D_LABITEMS: https://mimic.mit.edu/docs/iii/tables/d_labitems/\n",
+    "        Args:\n",
+    "            patients: a dict of `Patient` objects indexed by patient_id.\n",
+    "        Returns:\n",
+    "            The updated patients dict.\n",
+    "        Note:\n",
+    "            N/A\n",
+    "        \"\"\"\n",
+    "        table = \"D_LABITEMS\"\n",
+    "        print(f\"Parsing {table}\")\n",
+    "        assert(table in self._valid_text_tables)\n",
+    "        \n",
+    "        # read table\n",
+    "        df = pd.read_csv(\n",
+    "            os.path.join(self.root, f\"{table}.csv\"),\n",
+    "            usecols=[\"ITEMID\", \"LABEL\", \"CATEGORY\", \"FLUID\"],\n",
+    "            dtype={\"ITEMID\": str, \"LABEL\": str, \"CATEGORY\": str, \"FLUID\": str}\n",
+    "        )\n",
+    "        \n",
+    "        # drop rows with missing values\n",
+    "        df = df.dropna(subset=[\"ITEMID\", \"LABEL\", \"CATEGORY\", \"FLUID\"])\n",
+    "        # sort by sequence number (i.e., priority)\n",
+    "        df = df.sort_values([\"ITEMID\"], ascending=True)\n",
+    "       \n",
+    "        self._text_descriptions[table] = df.reset_index(drop=True).to_dict(orient='split')\n",
+    "        \n",
+    "        # We haven't altered the patients array, just return it.\n",
+    "        return patients\n",
+    "    \n",
     "    \n",
     "    def parse_d_items(self, patients: Dict[str, Patient]) -> Dict[str, Patient]: \n",
-    "        # TODO(botelho3) fill this in to join the text descriptions to the visit.\n",
+    "        # TODO(botelho3) - Note this may not be totally useable because the ITEMID\n",
+    "        # uinqiue key only links to these tables using ITEMID\n",
+    "        #   - INPUTEVENTS_MV \n",
+    "        #   - OUTPUTEVENTS on ITEMID\n",
+    "        #   - PROCEDUREEVENTS_MV on ITEMID\n",
+    "        # \n",
+    "        # Not to the tables we want e.g. \n",
+    "\n",
+    "        \"\"\"Helper function which parses D_ITEMS table.\n",
+    "        Will be called in `self.parse_tables()`\n",
+    "        Docs:\n",
+    "            - D_ITEMS: https://mimic.mit.edu/docs/iii/tables/d_items/\n",
+    "        Args:\n",
+    "            patients: a dict of `Patient` objects indexed by patient_id.\n",
+    "        Returns:\n",
+    "            The updated patients dict.\n",
+    "        Note:\n",
+    "            N/A\n",
+    "        \"\"\"\n",
+    "        table = \"D_ITEMS\"\n",
+    "        print(f\"Parsing {table}\")\n",
+    "        assert(table in self._valid_text_tables)\n",
+    "        \n",
+    "        # read table\n",
+    "        df = pd.read_csv(\n",
+    "            os.path.join(self.root, f\"{table}.csv\"),\n",
+    "            usecols=[\"ITEMID\", \"LABEL\", \"CATEGORY\"],\n",
+    "            dtype={\"ITEMID\": str, \"LABEL\": str, \"CATEGORY\": str}\n",
+    "        )\n",
+    "        \n",
+    "        # drop rows with missing values\n",
+    "        df = df.dropna(subset=[\"ITEMID\", \"LABEL\", \"CATEGORY\"])\n",
+    "        # sort by sequence number (i.e., priority)\n",
+    "        df = df.sort_values([\"ITEMID\"], ascending=True)\n",
+    "       \n",
+    "        self._text_descriptions[table] = df.reset_index(drop=True).to_dict(orient='split')\n",
+    "        \n",
+    "        # We haven't altered the patients array, just return it.\n",
     "        return patients\n",
     "    \n",
+    "    \n",
     "    def parse_d_icd_procedures(self, patients: Dict[str, Patient]) -> Dict[str, Patient]: \n",
-    "        # TODO(botelho3) fill this in to join the text descriptions to the visit.\n",
+    "        \"\"\"Helper function which parses D_ICD_PROCEDURES table.\n",
+    "        Will be called in `self.parse_tables()`\n",
+    "        Docs:\n",
+    "            - D_ICD_PROCEDURES: https://mimic.mit.edu/docs/iii/tables/d_icd_procedures/\n",
+    "        Args:\n",
+    "            patients: a dict of `Patient` objects indexed by patient_id.\n",
+    "        Returns:\n",
+    "            The updated patients dict.\n",
+    "        Note:\n",
+    "            N/A\n",
+    "        \"\"\"\n",
+    "        table = \"D_ICD_PROCEDURES\"\n",
+    "        print(f\"Parsing {table}\")\n",
+    "        assert(table in self._valid_text_tables)\n",
+    "        \n",
+    "        # read table\n",
+    "        df = pd.read_csv(\n",
+    "            os.path.join(self.root, f\"{table}.csv\"),\n",
+    "            usecols=[\"ICD9_CODE\", \"SHORT_TITLE\", \"LONG_TITLE\"],\n",
+    "            dtype={\"ICD9_CODE\": str, \"SHORT_TITLE\": str, \"LONG_TITLE\": str}\n",
+    "        )\n",
+    "        \n",
+    "        # drop rows with missing values\n",
+    "        df = df.dropna(subset=[\"ICD9_CODE\", \"SHORT_TITLE\", \"LONG_TITLE\"])\n",
+    "        # sort by sequence number (i.e., priority)\n",
+    "        df = df.sort_values([\"ICD9_CODE\"], ascending=True)\n",
+    "       \n",
+    "        # print(df.head())\n",
+    "        self._text_descriptions[table] = df.reset_index(drop=True).to_dict(orient='split')\n",
+    "        \n",
+    "        # We haven't altered the patients array, just return it.\n",
     "        return patients\n",
     "    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 96,
    "id": "427a42a6-441e-438c-96f4-b38ba82fd192",
    "metadata": {},
    "outputs": [
@@ -333,11 +416,28 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Parsing PATIENTS and ADMISSIONS: 100%|███████████████████████████████████████████████| 100/100 [00:00<00:00, 540.68it/s]\n",
-      "Parsing PROCEDURES_ICD: 100%|███████████████████████████████████████████████████████| 113/113 [00:00<00:00, 5934.54it/s]\n",
-      "Parsing PRESCRIPTIONS: 100%|██████████████████████████████████████████████████████████| 122/122 [00:01<00:00, 99.08it/s]\n",
-      "Parsing LABEVENTS: 100%|██████████████████████████████████████████████████████████████| 129/129 [00:06<00:00, 21.36it/s]\n",
-      "Mapping codes: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 214.50it/s]\n"
+      "Parsing PATIENTS and ADMISSIONS: 100%|███████████████████████████████████████████████| 100/100 [00:00<00:00, 616.21it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parsing D_ICD_DIAGNOSES\n",
+      "Parsing D_ICD_PROCEDURES\n",
+      "Parsing D_ITEMS\n",
+      "Parsing D_LABITEMS\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Parsing DIAGNOSES_ICD: 100%|████████████████████████████████████████████████████████| 129/129 [00:00<00:00, 5045.37it/s]\n",
+      "Parsing PROCEDURES_ICD: 100%|███████████████████████████████████████████████████████| 113/113 [00:00<00:00, 6525.72it/s]\n",
+      "Parsing PRESCRIPTIONS: 100%|█████████████████████████████████████████████████████████| 122/122 [00:00<00:00, 136.63it/s]\n",
+      "Parsing LABEVENTS: 100%|██████████████████████████████████████████████████████████████| 129/129 [00:05<00:00, 25.78it/s]\n",
+      "Mapping codes: 100%|█████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 244.02it/s]\n"
      ]
     },
     {
@@ -350,8 +450,11 @@
       "\t- Number of patients: 100\n",
       "\t- Number of visits: 129\n",
       "\t- Number of visits per patient: 1.2900\n",
-      "\t- Number of events per visit in D_ITEMS: 0.0000\n",
+      "\t- Number of events per visit in D_ICD_DIAGNOSES: 0.0000\n",
       "\t- Number of events per visit in D_ICD_PROCEDURES: 0.0000\n",
+      "\t- Number of events per visit in D_ITEMS: 0.0000\n",
+      "\t- Number of events per visit in D_LABITEMS: 0.0000\n",
+      "\t- Number of events per visit in DIAGNOSES_ICD: 13.6512\n",
       "\t- Number of events per visit in PROCEDURES_ICD: 3.9225\n",
       "\t- Number of events per visit in PRESCRIPTIONS: 115.6667\n",
       "\t- Number of events per visit in LABEVENTS: 479.1628\n",
@@ -378,16 +481,64 @@
     "mimic3base = MIMIC3DatasetWrapper(\n",
     "    #root=\"https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III/\",\n",
     "    root=os.path.join(os.getcwd(), DATA_DIR_),\n",
-    "    tables=[\"D_ITEMS\", \"D_ICD_PROCEDURES\", \"PROCEDURES_ICD\", \"PRESCRIPTIONS\", \"LABEVENTS\",], # \"D_ICD_DIAGNOSES\", \"DIAGNOSES_ICD\"\n",
+    "    tables=[\"D_ICD_DIAGNOSES\", \"D_ICD_PROCEDURES\", \"D_ITEMS\", \"D_LABITEMS\",\n",
+    "            \"DIAGNOSES_ICD\", \"PROCEDURES_ICD\", \"PRESCRIPTIONS\", \"LABEVENTS\"],\n",
     "    # map all NDC codes to ATC 3-rd level codes in these tables\n",
     "    # See https://en.wikipedia.org/wiki/Anatomical_Therapeutic_Chemical_Classification_System.\n",
     "    code_mapping={\"NDC\": (\"ATC\", {\"target_kwargs\": {\"level\": 3}})},\n",
+    "    # Slow\n",
+    "    refresh_cache=True,\n",
     ")\n",
     "\n",
     "mimic3base.stat()\n",
     "mimic3base.info()\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "id": "4cef4476-6bc8-4791-b246-4c329a80a2e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['D_ICD_DIAGNOSES', 'D_ITEMS', 'D_ICD_PROCEDURES', 'D_LABITEMS'])\n",
+      "Table: D_ICD_DIAGNOSES\n",
+      "[['0010', 'Cholera d/t vib cholerae', 'Cholera due to vibrio cholerae'], ['0011', 'Cholera d/t vib el tor', 'Cholera due to vibrio cholerae el tor'], ['0019', 'Cholera NOS', 'Cholera, unspecified'], ['0020', 'Typhoid fever', 'Typhoid fever'], ['0021', 'Paratyphoid fever a', 'Paratyphoid fever A']]\n",
+      "\n",
+      "\n",
+      "\n",
+      "Table: D_ITEMS\n",
+      "[['1126', 'Art.pH', 'ABG'], ['1127', 'WBC   (4-11,000)', 'Hematology'], ['1520', 'ACT', 'Coags'], ['1521', 'Albumin', 'Chemistry'], ['1522', 'Calcium', 'Chemistry']]\n",
+      "\n",
+      "\n",
+      "\n",
+      "Table: D_ICD_PROCEDURES\n",
+      "[['0001', 'Ther ult head & neck ves', 'Therapeutic ultrasound of vessels of head and neck'], ['0002', 'Ther ultrasound of heart', 'Therapeutic ultrasound of heart'], ['0003', 'Ther ult peripheral ves', 'Therapeutic ultrasound of peripheral vascular vessels'], ['0009', 'Other therapeutic ultsnd', 'Other therapeutic ultrasound'], ['0010', 'Implant chemothera agent', 'Implantation of chemotherapeutic agent']]\n",
+      "\n",
+      "\n",
+      "\n",
+      "Table: D_LABITEMS\n",
+      "[['50800', 'SPECIMEN TYPE', 'BLOOD', 'BLOOD GAS'], ['50801', 'Alveolar-arterial Gradient', 'Blood', 'Blood Gas'], ['50802', 'Base Excess', 'Blood', 'Blood Gas'], ['50803', 'Calculated Bicarbonate, Whole Blood', 'Blood', 'Blood Gas'], ['50804', 'Calculated Total CO2', 'Blood', 'Blood Gas']]\n",
+      "\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "table_names = mimic3base.get_all_tables()\n",
+    "print(table_names)\n",
+    "\n",
+    "for t in table_names:\n",
+    "    d = mimic3base.get_text_dict(t)\n",
+    "    print(f\"Table: {t}\")\n",
+    "    print(d['data'][:5])\n",
+    "    print('\\n\\n')\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "b02b8e28-23ac-4ac1-a9b6-ae546e28c905",