ehrapy copied to clipboard
Integrate Synthea or create own fake data generator
We should try to wrap it in Python (while checking for JDK etc).
Output default csv, but others should be supported as well.
CC #102
java -jar synthea-with-dependencies.jar -p 5 -c
⋊> ~/Desktop cat
exporter.ccda.export = false
exporter.fhir.export = true
exporter.csv.export = true
generates csv files.
This could also be a cool option:
import random
import uuid
from datetime import datetime, timedelta
from faker import Faker
fake = Faker()
# Define some example FHIR resources and their fields
patient_fields = ['id', 'birthDate', 'gender', 'address', 'phone']
observation_fields = ['id', 'status', 'category', 'code', 'subject', 'issued', 'valueQuantity']
condition_fields = ['id', 'clinicalStatus', 'category', 'code', 'subject', 'onsetDateTime']
# Define some example values for FHIR resource fields
gender_values = ['male', 'female', 'other']
category_values = ['vital-signs', 'laboratory', 'imaging', 'procedure']
code_values = ['1000001', '1000002', '1000003', '1000004', '1000005']
clinical_status_values = ['active', 'resolved', 'remission', 'relapse']
def generate_patient():
Generate a fake FHIR Patient resource.
patient = {
'resourceType': 'Patient',
'id': str(uuid.uuid4()),
'birthDate': fake.date_of_birth(),
'gender': random.choice(gender_values),
'address': {
'line': fake.street_address(),
'state': fake.state_abbr(),
'postalCode': fake.zipcode()
'phone': fake.phone_number()
return patient
def generate_observation(patient_id):
Generate a fake FHIR Observation resource.
observation = {
'resourceType': 'Observation',
'id': str(uuid.uuid4()),
'status': 'final',
'category': {
'coding': [{
'system': '',
'code': random.choice(category_values)
'code': {
'coding': [{
'system': '',
'code': random.choice(code_values),
'display': 'Example Code'
'subject': {
'reference': f'Patient/{patient_id}'
'valueQuantity': {
'value': random.randint(1, 100),
'unit': 'mmHg',
'system': '',
'code': 'mm[Hg]'
return observation
def generate_condition(patient_id):
Generate a fake FHIR Condition resource.
condition = {
'resourceType': 'Condition',
'id': str(uuid.uuid4()),
'clinicalStatus': random.choice(clinical_status_values),
'category': {
'coding': [{
'system': '',
'code': random.choice(category_values)
'code': {
'coding': [{
'system': '',
'code': random.choice(code_values),
'display': 'Example Code'
'subject': {
'reference': f'Patient/{patient_id}'
'onsetDateTime': ( - timedelta(days=random.randint(1, 3650))).isoformat()
return condition
# Generate some example FHIR data
patient = generate_patient()
observation = generate_observation(patient['id'])
condition = generate_condition(patient['id'])
More complex
import random
import uuid
from datetime import datetime, timedelta
from faker import Faker
from itertools import cycle
fake = Faker()
# Define some example FHIR resources and their fields
patient_fields = ['id', 'birthDate', 'gender', 'address', 'phone']
observation_fields = ['id', 'status', 'category', 'code', 'subject', 'issued', 'valueQuantity']
condition_fields = ['id', 'clinicalStatus', 'category', 'code', 'subject', 'onsetDateTime']
encounter_fields = ['id', 'status', 'class', 'type', 'subject', 'period', 'participant']
medication_request_fields = ['id', 'status', 'medication', 'subject', 'authoredOn', 'dosageInstruction']
medication_dispense_fields = ['id', 'status', 'medicationReference', 'subject', 'whenPrepared', 'dosageInstruction']
procedure_fields = ['id', 'status', 'code', 'subject', 'performedDateTime', 'performer']
# Define some example values for FHIR resource fields
gender_values = ['male', 'female', 'other']
race_values = ['white', 'black', 'asian', 'hispanic', 'other']
ethnicity_values = ['nonhispanic', 'hispanic', 'unknown']
category_values = ['vital-signs', 'laboratory', 'imaging', 'procedure']
code_values = ['1000001', '1000002', '1000003', '1000004', '1000005']
clinical_status_values = ['active', 'resolved', 'remission', 'relapse']
encounter_status_values = ['planned', 'arrived', 'triaged', 'in-progress', 'on-leave', 'finished', 'cancelled']
encounter_class_values = ['ambulatory', 'emergency', 'inpatient', 'outpatient', 'urgentcare']
encounter_type_values = ['office-visit', 'emergency', 'inpatient', 'outpatient', 'urgentcare']
medication_status_values = ['active', 'completed', 'cancelled', 'on-hold', 'stopped', 'draft']
procedure_status_values = ['preparation', 'in-progress', 'not-done', 'on-hold', 'stopped', 'completed', 'entered-in-error', 'unknown']
procedure_code_values = ['1010001', '1010002', '1010003', '1010004', '1010005']
procedure_performer_values = ['primary', 'assistant', 'nurse', 'technician']
# Define some example medications and their dosages
medication_values = {
'atorvastatin': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 1,
'period': 'day'
'metoprolol': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 2,
'period': 'day'
'lisinopril': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 1,
'period': 'day'
def generate_patient():
Generate a fake FHIR Patient resource.
patient = {
'resourceType': 'Patient',
'id': str(uuid.uuid4()),
'meta': {
'versionId': '1',
'text': {
'status': 'generated',
'div': '<div xmlns=""></div>'
'birthDate': fake.date_of_birth().isoformat(),
'gender': random.choice(gender_values),
'address': [{
'use': 'home',
'type': 'postal',
'text': fake.address(),
'line': [fake.street_address()],
'district': fake.state(),
'postalCode': fake.postcode(),
'phone': [{
'system': 'phone',
'value': fake.phone_number(),
'use': 'home'
return patient
def generate_observation(patient_id, category=None, code=None, value=None):
Generate a fake FHIR Observation resource.
if not category:
category = random.choice(category_values)
if not code:
code = random.choice(code_values)
if not value:
value = round(random.uniform(1, 100), 2)
bservation = {
'resourceType': 'Observation',
'id': str(uuid.uuid4()),
'status': 'final',
'category': {
'coding': [{
'system': '',
'code': category,
'display': category.capitalize()
'code': {
'coding': [{
'system': '',
'code': code,
'display': fake.word()
'subject': {
'reference': f'Patient/{patient_id}'
'valueQuantity': {
'value': value,
'unit': random.choice(['mg/dL', 'mmol/L', 'kg', 'cm', 'mmHg', 'bpm'])
return observation
def generate_condition(patient_id, category=None, code=None, onset=None):
Generate a fake FHIR Condition resource.
if not category:
category = random.choice(category_values)
if not code:
code = random.choice(code_values)
if not onset:
onset = fake.date_time_between(start_date='-50y', end_date='now').isoformat()
condition = {
'resourceType': 'Condition',
'id': str(uuid.uuid4()),
'clinicalStatus': random.choice(clinical_status_values),
'category': {
'coding': [{
'system': '',
'code': category,
'display': category.capitalize()
'code': {
'coding': [{
'system': '',
'code': code,
'display': fake.word()
'subject': {
'reference': f'Patient/{patient_id}'
'onsetDateTime': onset
return condition
def generate_encounter(patient_id, type=None, start=None, end=None):
Generate a fake FHIR Encounter resource.
if not type:
type = random.choice(encounter_type_values)
if not start:
start = fake.date_time_between(start_date='-50y', end_date='now').isoformat()
if not end:
end = (datetime.fromisoformat(start) + timedelta(minutes=random.randint(10, 360))).isoformat()
encounter = {
'resourceType': 'Encounter',
'id': str(uuid.uuid4()),
'status': 'finished',
'class': {
'system': '',
'code': 'AMB',
'display': 'ambulatory'
'type': [{
'coding': [{
'system': '',
'code': type,
'display': type.capitalize()
'subject': {
'reference': f'Patient/{patient_id}'
'period': {
'start': start,
'end': end
'participant': [{
'individual': {
'reference': f'Patient/{patient_id}'
return encounter
def generate_medication_request(patient_id, medication=None, status=None, intent=None):
Generate a fake FHIR MedicationRequest resource.
if not medication:
medication = random.choice(medication_values)
if not status:
status = random.choice(status_values)
if not intent:
intent = random.choice(intent_values)
medication_request = {
'resourceType': 'MedicationRequest',
'id': str(uuid.uuid4()),
'status': status,
'intent': intent,
'subject': {
'reference': f'Patient/{patient_id}'
'medicationCodeableConcept': {
'coding': [{
'system': '',
'code': medication,
'display': fake.word()
'dosageInstruction': [{
'sequence': 1,
'text': fake.sentence(nb_words=6),
'timing': {
'repeat': {
'frequency': random.randint(1, 3),
'period': random.randint(1, 5),
'periodUnit': 'd'
'route': {
'coding': [{
'system': '',
'code': random.choice(['26643006', '255560000', '254790003']),
'display': random.choice(['Oral', 'Injection', 'Topical'])
return medication_request
def generate_all_resources(num_patients):
Generate a list of all FHIR resources for the specified number of patients.
patients = []
observations = []
conditions = []
encounters = []
medication_requests = []
for i in range(num_patients):
patient_id = i + 1
for j in range(random.randint(5, 20)):
for j in range(random.randint(1, 5)):
for j in range(random.randint(1, 3)):
for j in range(random.randint(1, 5)):
resources = patients + observations + conditions + encounters + medication_requests
return resources
import pytest
def test_generate_all_resources():
resources = generate_all_resources(10)
assert len(resources) == 10 * (5 + 1 + 1 + 1 + 1)
for resource in resources:
assert resource['resourceType'] in ['Patient', 'Observation', 'Condition', 'Encounter', 'MedicationRequest']
if resource['resourceType'] == 'Patient':
assert resource.get('birthDate') is not None
assert resource.get('gender') is not None
elif resource['resourceType'] == 'Observation':
assert resource.get('valueQuantity') is not None
assert resource.get('code') is not None
elif resource['resourceType'] == 'Condition':
assert resource.get('code') is not None
assert resource.get('subject') is not None
elif resource['resourceType'] == 'Encounter':
assert resource.get('class') is not None
assert resource.get('subject') is not None
elif resource['resourceType'] == 'MedicationRequest':
assert resource.get('medicationReference') is not None
assert resource.get('subject') is not None
There is a pipeline that first uses synthea to generate CSV files and then a synthea R package to create OMOP files