Data Collection (interviews, observations)
|
v
Open Coding: Line-by-line coding of raw data
|
v
Axial Coding: Grouping codes into categories,
identifying relationships
|
v
Selective Coding: Identifying the core category
that integrates all others
|
v
Theoretical Saturation: Stop when new data
no longer generates new codes
|
v
Substantive Theory: A grounded explanation of the phenomenon
def create_interview_protocol(research_questions: list[str],
n_questions: int = 10) -> dict:
"""
Generate a semi-structured interview protocol template.
Args:
research_questions: The study's research questions
n_questions: Target number of interview questions
"""
protocol = {
'opening': {
'rapport_building': [
"Thank you for participating. Before we begin, could you "
"tell me a little about yourself and your background?",
"How did you first become involved in [topic]?"
],
'time_estimate': '60-90 minutes'
},
'main_questions': [],
'closing': {
'wrap_up': [
"Is there anything else you would like to share that we "
"have not covered?",
"Looking back, what stands out most to you about [topic]?",
"Do you have any questions for me?"
]
},
'guidelines': [
'Ask open-ended questions (how, what, tell me about)',
'Avoid leading questions',
'Use probes: "Can you give me an example?"',
'Use follow-ups: "You mentioned X, tell me more about that"',
'Allow silences -- do not rush to fill pauses',
'Record field notes immediately after each interview'
]
}
# Generate question structure
for i, rq in enumerate(research_questions):
protocol['main_questions'].append({
'research_question': rq,
'interview_questions': [
f'Grand tour question for RQ{i+1}',
f'Follow-up probe for RQ{i+1}',
f'Example-seeking probe for RQ{i+1}'
]
})
return protocol
def thematic_analysis_workflow(transcripts: list[str]) -> dict:
"""
Outline the six phases of reflexive thematic analysis.
"""
phases = {
'phase_1_familiarization': {
'actions': [
'Read and re-read all transcripts',
'Note initial impressions in a research journal',
'Transcribe recordings if not already done'
],
'output': 'Familiarity with data, initial notes'
},
'phase_2_coding': {
'actions': [
'Code every data segment systematically',
'Use open coding (inductive) or deductive codes from framework',
'Code inclusively -- same segment can have multiple codes',
'Maintain a codebook with definitions and examples'
],
'output': 'Coded dataset, codebook'
},
'phase_3_generating_themes': {
'actions': [
'Collate codes into potential themes',
'Create a thematic map showing relationships',
'Distinguish between semantic and latent themes'
],
'output': 'Candidate themes and sub-themes'
},
'phase_4_reviewing_themes': {
'actions': [
'Check themes against coded extracts',
'Check themes against entire dataset',
'Merge, split, or discard themes as needed'
],
'output': 'Refined thematic map'
},
'phase_5_defining_themes': {
'actions': [
'Write a detailed description of each theme',
'Identify the essence of each theme',
'Name themes concisely and informatively'
],
'output': 'Theme definitions and names'
},
'phase_6_writing_up': {
'actions': [
'Weave together analytic narrative and data extracts',
'Select vivid, compelling quotes for each theme',
'Connect themes to research questions and literature'
],
'output': 'Final analysis write-up'
}
}
return {
'phases': phases,
'n_transcripts': len(transcripts),
'estimated_time': f'{len(transcripts) * 4}-{len(transcripts) * 8} hours'
}