Skip to content
Snippets Groups Projects
Commit f162a0eb authored by Christoph Steindl's avatar Christoph Steindl
Browse files

Add notebooks for 2023-12-04

parents
No related branches found
No related tags found
No related merge requests found
.venv
yolo*
\ No newline at end of file
%% Cell type:markdown id: tags:
Download various data formats from the internet using the Python `requests` library and save them to disk.
Data formats:
- XML
- CSV
- Image
%% Cell type:code id: tags:
``` python
import requests
from pathlib import Path
data_folder = Path('data')
data_folder.mkdir(exist_ok=True)
```
%% Cell type:code id: tags:
``` python
# XML
# TEI-XML file from the Blotius edition (https://edition.onb.ac.at/blotius)
xml_data = requests.get('https://edition.onb.ac.at/blotius/o:blo.inventar-mz/datastreams/TEI_SOURCE/content')
with open(data_folder / 'tei-xml_example.xml', 'wb') as file:
file.write(xml_data.content)
```
%% Cell type:code id: tags:
``` python
# CSV
# Historic postcards metadata sheet
csv_data = requests.get('https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/-/raw/master/akon_postcards_public_domain.csv.bz2?ref_type=heads&inline=false')
with open(data_folder / 'csv_example.csv.bz2', 'wb') as file:
file.write(csv_data.content)
```
%% Cell type:code id: tags:
``` python
# Image
# Stock image from pexels.com
img_data = requests.get('https://images.pexels.com/photos/572056/pexels-photo-572056.jpeg?cs=srgb&dl=pexels-serpstat-572056.jpg&fm=jpg&w=5664&h=3778')
with open(data_folder / 'image_example.jpg', 'wb') as file:
file.write(img_data.content)
```
%% Cell type:markdown id: tags:
Process the TEI-XML file using the `lxml` library with basic queries.
%% Cell type:code id: tags:
``` python
from lxml import etree as et
from pathlib import Path
data_folder = Path('data')
output_folder = Path('output')
output_folder.mkdir(exist_ok=True)
namespaces = {
'tei': 'http://www.tei-c.org/ns/1.0'
}
```
%% Cell type:code id: tags:
``` python
blotius_xml = et.parse(data_folder / 'tei-xml_example.xml')
```
%% Cell type:code id: tags:
``` python
for rs in blotius_xml.xpath('.//tei:rs', namespaces=namespaces):
# list all attributes of rs-elements
if rs.attrib['type'] != 'place':
print(rs)
```
%% Cell type:code id: tags:
``` python
# count the number of entries
entries = blotius_xml.xpath('.//tei:div[@type = "entry"]', namespaces=namespaces)
print(len(entries))
```
%% Cell type:code id: tags:
``` python
# manipulate all entries
for entry in entries:
entry.attrib['subtype'] = 'catalogue'
```
%% Cell type:code id: tags:
``` python
# save manipulated xml to disk
blotius_xml.write(output_folder / 'tei-xml_example.xml')
```
%% Cell type:markdown id: tags:
Process csv data using *pandas* data frames
%% Cell type:code id: tags:
``` python
import pandas as pd
from pathlib import Path
from ultralytics import YOLO
data_folder = Path('data')
output_folder = Path('output')
output_folder.mkdir(exist_ok=True)
```
%% Cell type:code id: tags:
``` python
akon_data = pd.read_csv(data_folder / 'csv_example.csv.bz2', compression='bz2')
print(akon_data.head())
```
%% Cell type:code id: tags:
``` python
# save file without compression to disk, as Excel file and as csv dump
akon_data.to_excel(output_folder / 'csv_example.xlsx')
akon_data.to_csv(output_folder / 'csv_example.csv')
```
%% Cell type:code id: tags:
``` python
# get all black and white images
akon_bw = akon_data[akon_data['color'] == False]
akon_bw.to_excel(output_folder / 'csv_example_bw.xlsx')
print(akon_bw.head())
```
%% Cell type:code id: tags:
``` python
# get all images from DE before 1900
akon_de = akon_data[(akon_data['country_id'] == 'DE') & (akon_data['year'] < 1900)]
akon_de.to_excel(output_folder / 'csv_example_de.xlsx')
print(akon_de.head())
```
%% Cell type:markdown id: tags:
Process images using Python Pillow library
%% Cell type:code id: tags:
``` python
from PIL import Image, ImageDraw, ImageFilter
from ultralytics import YOLO
from pathlib import Path
data_folder = Path('data')
output_folder = Path('output')
output_folder.mkdir(exist_ok=True)
```
%% Cell type:code id: tags:
``` python
stock_image = Image.open(data_folder / 'image_example.jpg')
```
%% Cell type:code id: tags:
``` python
print(stock_image.format, stock_image.size, stock_image.mode)
```
%% Cell type:code id: tags:
``` python
stock_image_rect = ImageDraw.Draw(stock_image)
stock_image_rect.line((0, 0) + stock_image.size, fill=128)
stock_image_rect.rectangle([(300, 500), (1100, 900)], fill=128)
stock_image.save(output_folder / 'image_example_rect.png', 'PNG')
print(stock_image.getpixel((3000, 2500)))
# excursus digital image representation
```
%% Cell type:code id: tags:
``` python
stock_image = Image.open(data_folder / 'image_example.jpg')
stock_image_crop = stock_image.crop((200, 130, 500, 700))
stock_image_crop.save(output_folder / 'image_example_crop.png', 'PNG')
stock_image_grey = stock_image.convert('L')
stock_image_grey.save(output_folder / 'image_example_grey.png', 'PNG')
stock_image_edges = stock_image.filter(ImageFilter.FIND_EDGES)
stock_image_edges.save(output_folder / 'image_example_edges.png', 'PNG')
stock_image_blur = stock_image.filter(ImageFilter.BLUR)
stock_image_blur.save(output_folder / 'image_example_blur.png', 'PNG')
stock_image_contour = stock_image.filter(ImageFilter.CONTOUR)
stock_image_contour.save(output_folder / 'image_example_contour.png', 'PNG')
stock_image_edge_enhance = stock_image.filter(ImageFilter.EDGE_ENHANCE)
stock_image_edge_enhance.save(output_folder / 'image_example_edge_enhance.png', 'PNG')
stock_image_sharpen = stock_image.filter(ImageFilter.SHARPEN)
stock_image_sharpen.save(output_folder / 'image_example_sharpen.png', 'PNG')
stock_image_emboss = stock_image.filter(ImageFilter.EMBOSS)
stock_image_emboss.save(output_folder / 'image_example_emboss.png', 'PNG')
stock_image_smooth = stock_image.filter(ImageFilter.SMOOTH)
stock_image_smooth.save(output_folder / 'image_example_smooth.png', 'PNG')
stock_image_detail = stock_image.filter(ImageFilter.DETAIL)
stock_image_detail.save(output_folder / 'image_example_detail.png', 'PNG')
```
%% Cell type:markdown id: tags:
Classify image using ultralytics
%% Cell type:code id: tags:
``` python
# detect objects in image
model = YOLO('yolov8n.pt') # load an official model
results = model(data_folder / 'image_example.jpg')
for r in results:
im_array = r.plot()
im = Image.fromarray(im_array[..., ::-1])
im.save(output_folder / 'image_example_ultralytics_detect.png')
```
%% Cell type:code id: tags:
``` python
# image classification
model = YOLO('yolov8n-cls.pt') # load an official model
results = model(data_folder / 'image_example.jpg')
for r in results:
im_array = r.plot()
im = Image.fromarray(im_array[..., ::-1])
im.save(output_folder / 'image_example_ultralytics_predict.png')
```
%% Cell type:code id: tags:
``` python
# image segmentation
model = YOLO('yolov8n-seg.pt') # load an official model
results = model(data_folder / 'image_example.jpg')
for r in results:
print(r)
im_array = r.plot()
im = Image.fromarray(im_array[..., ::-1])
im.save(output_folder / 'image_example_ultralytics_segment.png')
```
%% Cell type:markdown id: tags:
# task
- download an image
- object detection
- save detected objects as snippets
# Python Introduction
2023W 070128-1 Methodological course - Analyzing Central Europe's Intellectual Heritage
## General Shortcuts
`Ctrl + Shift + X`: extension manager
`Ctrl + Shift + P`: command palette
`Ctrl + Shift + G`: source control view
## Repository Setup
1. fork this repository in GitLab to have a private copy
2. open VS Code
3. clone git repository via command palette: `Git: Clone`
- use `https` and not `ssh` with your username and password
- if asked, open cloned repository in a new window
4. download an image from the internet and add it to this readme
5. make a git commit via the source control view and push the changes to GitLab
## Python Setup
1. create virtual environment `.venv` via command palette: `Python: Create environment`
2. install project requirements
- open a new terminal via menu: (`Terminal > New Terminal`)
- verify you are using the virtual environment
- install dependencies via python packet manager `pip install -r requirements.txt`
- list the installed packages via python packet manager `pip freeze`
- close the terminal
3. open the first Jupyter Notebook and install package if needed
## Lesson 1 - Web Resources
This notebooks gives an introduction how to us data sources from the internet.
lxml==4.9.3
openpyxl==3.1.2
pandas==2.1.3
Pillow==10.1.0
requests==2.31.0
ultralytics==8.0.222
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment