Uploading EMR Records

To make use of this software, we will first need to upload some medical records to the database. To do this, you can click on the dropdown menu on the top right of the page. From here you can select the "Upload Data" option. This will redirect you to a page where you can select a file with the data from your computer by clicking the "Choose File" button. The allowed file formats for this data are:

  1. CSV (.csv)
  2. Excel (.xlsx)
  3. Json (.json)
  4. Parquet (.parquet)
  5. Pickle (.pickle or .pkl)
  6. XML (.xml)

The file with the data should contain tabular data with at least the following columns: 1. patient_id (A unique ID for the patient) 2. text_id (A unique ID for the medical note) 3. text (The medical note written by a doctor) 4. text_date (The date at which this note was recorded)

Reference

The data/notes is uploaded to MongoDB using the following functions:

Upload Data

This is a flask function for the backend logic to upload a file to the database.

Source code in cedars/app/ops.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
@bp.route("/upload_data", methods=["GET", "POST"])
@auth.admin_required
def upload_data():
    """
    This is a flask function for the backend logic to upload a file to the database.
    """
    filename = None
    if request.method == "POST":
        if db.get_task(f"upload_and_process:{current_user.username}"):
            flash("A file is already being processed.")
            return redirect(request.url)
        minio_file = request.form.get("miniofile")
        if minio_file != "None" and minio_file is not None:
            logger.info(f"Using minio file: {minio_file}")
            filename = minio_file
        else:
            if 'data_file' not in request.files:
                flash('No file part')
                return redirect(request.url)
            file = request.files['data_file']
            if file.filename == '':
                flash('No selected file')
                return redirect(request.url)
            if file and not allowed_data_file(file.filename):
                flash("Invalid file type. Please upload a .csv, .xlsx, .json, .parquet, .pickle, .pkl, or .xml file.")
                return redirect(request.url)

            filename = f"uploaded_files/{secure_filename(file.filename)}"
            size = os.fstat(file.fileno()).st_size
            try:
                minio.put_object(g.bucket_name,
                                 filename,
                                 file,
                                 size)
                logger.info(f"File - {file.filename} uploaded successfully.")
                flash(f"{filename} uploaded successfully.")

            except Exception as e:
                filename = None
                flash(f"Failed to upload file: {str(e)}")
                return redirect(request.url)

        if filename:
            try:
                EMR_to_mongodb(filename)
                flash(f"Data from {filename} uploaded to the database.")
                return redirect(url_for('ops.upload_query'))
            except Exception as e:
                flash(f"Failed to upload data: {str(e)}")
                return redirect(request.url)
    try:
        files = [(obj.object_name, obj.size)
                 for obj in minio.list_objects(g.bucket_name,
                                               prefix="uploaded_files/")]
    except Exception as e:
        flash(f"Error listing files: {e}")
        files = []

    return render_template("ops/upload_file.html", files=files, **db.get_info())

Upload EMR data to MongoDB

This function is used to open a csv file and load it's contents into the mongodb database.

Parameters:
  • filename (str) ) –

    The path to the file to load data from.

Returns: None

Source code in cedars/app/ops.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def EMR_to_mongodb(filepath):  # pylint: disable=C0103
    """
    This function is used to open a csv file and load it's contents into the mongodb database.

    Args:
        filename (str) : The path to the file to load data from.
        For valid file extensions refer to the allowed_data_file function above.
    Returns:
        None
    """

    data_frame = load_pandas_dataframe(filepath)
    if data_frame is None:
        return

    logger.info(f"columns in dataframe:\n {data_frame.columns}")
    logger.debug(data_frame.head())
    id_list = data_frame["patient_id"].unique()
    logger.info("Starting document migration to mongodb database.")
    for i, p_id in enumerate(id_list):
        documents = data_frame[data_frame["patient_id"] == p_id]
        db.upload_notes(documents)
        if i+1 % 100 == 0:
            logger.info(f"Documents uploaded for patient #{i+1}")

    logger.info("Completed document migration to mongodb database.")