Python & MySQL

Part 4. ๋ฐ์ดํ„ฐ์˜ ์ดํ•ด์™€ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค

๋ณธ ํฌ์ŠคํŒ…์€ ํŒจ์ŠคํŠธ์บ ํผ์Šค(FastCampus)์˜ ๋ฐ์ดํ„ฐ ์—”์ง€๋‹ˆ์–ด๋ง ์˜ฌ์ธ์› ํŒจํ‚ค์ง€ Online์„ ์ฐธ๊ณ ํ•˜์˜€์Šต๋‹ˆ๋‹ค.

1. Pymysql ํŒจํ‚ค์ง€

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import sys
import requests
import base64
import json
import logging
import pymysql #New library

client_id = '' #์ง์ ‘ ์ž…๋ ฅ
client_secret = '' # ์ง์ ‘ ์ž…๋ ฅ

host = '' #host
port = 3306
username = '' #user
database = '' #db
password = '' #passwd

def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    cursor.execute('SHOW TABLES')
    print(cursor.fetchall())

    print('success')
    sys.exit(0)

if __name__ == '__main__':
    main()

2. INSERT, UPDATE, REPLACE, INSERT IGNORE

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
CREATE TABLE artists (id VARCHAR(255), name VARCHAR(255), followers INT, popularity INT, url VARCHAR(255), image_url VARCHAR(255), PRIMARY KEY(id)) ENGINE=InnoDB DEFAULT CHARSET='utf8';
CREATE TABLE artist_genres (artist_id VARCHAR(255), genre VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARSET='utf8';
SHOW CREATE TABLE artists;

-- INSERT 
INSERT INTO artist_genres (artist_id, genre) VALUES ('1234', 'pop');
DELETE FROM artist_genres; --์ œ๊ฑฐ
DROP TABLE artist_genres; --์ œ๊ฑฐ
CREATE TABLE artist_genres (artist_id VARCHAR(255), genre VARCHAR(255), UNIQUE KEY(artist_id, genre)) ENGINE=InnoDB DEFAULT CHARSET='utf8'; --unique key ์ƒ์„ฑ
INSERT INTO artist_genres (artist_id, genre) VALUES ('1234', 'pop');
INSERT INTO artist_genres (artist_id, genre) VALUES ('1234', 'pop'); --๋‘ ๋ฒˆ ํ•˜๋ฉด ERROR

-- UPDATE
UPDATE artist_genres SET genre='pop' WHERE artist_id ='1234';
ALTER TABLE artist_genres ADD COLUMN country VARCHAR(255); 
ALTER TABLE artist_genres ADD COLUMN updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON 
UPDATE CURRENT_TIMESTAMP; --์—…๋ฐ์ดํŠธ๋˜๋Š” ์‹œ๊ฐ๋Œ€ ์ž๋™์ถ”๊ฐ€
INSERT INTO artist_genres (artist_id, genre, country) VALUES ('1234','pop','UK'); -- ์˜ค๋ฅ˜ ๋ฐœ์ƒํ•จ

-- REPLACE
REPLACE INTO artist_genres (artist_id, genre, country) VALUES ('1234','pop','UK');
/* ๋ฌธ์ œ์ (1) ์ง€์šฐ๊ณ  ์—…๋ฐ์ดํŠธํ•˜๊ธฐ ๋•Œ๋ฌธ์— 2๋ฒˆ์˜ ๊ณผ์ •์„ ๊ฑฐ์ณ์„œ ํผํฌ๋จผ์Šค์ ์œผ๋กœ ๋ฌธ์ œ ์ƒ๊ธธ ์ˆ˜๊ฐ€ ์žˆ๋‹ค.
๋ฌธ์ œ์ (2) primary key๊ฐ€ auto_increment์ธ ๊ฒฝ์šฐ ์ƒˆ๋กœ์šด ์ˆซ์ž๋กœ ๋ฐ”๋€Œ๊ฒŒ ๋œ๋‹ค. */

-- INSERT IGNORE
INSERT IGNORE INTO artist_genres (artist_id, genre, country) VALUES ('1234','rock','UK');
/* ๋ฌธ์ œ์ (1) ์ด๋ฏธ ๊ฐ’์ด ์žˆ์œผ๋ฉด ์ถ”๊ฐ€ํ•˜์ง€ ์•Š๊ฒŒ ๋œ๋‹ค.*/

-- INSERT ... ON DUPLICATE KEY UPDATE
INSERT INTO artist_genres (artist_id, genre, country) VALUES ('1234','rock','UK') ON DUPLICATE KEY UPDATE artist_id='1234', genre='rock', country='FR'; --UK๋ฅผ FR๋กœ ๋ฐ”๊พผ๋‹ค.

-- ETC
ALTER TABLE artist_genres DROP COLUMN country; --๋ถˆํ•„์š”ํ•œ ์นผ๋Ÿผ ์ง€์šฐ๊ธฐ

*MySQL์—์„œ ์ง„ํ–‰ํ•˜์˜€๋‹ค.

ํŠน์ด์‚ฌํ•ญ
  • data type์„ INTEGER๋กœ ํ•˜๋‹ˆ๊นŒ ์•ˆ ๋˜๊ณ , INT๋กœ ํ•˜๋‹ˆ๊นŒ ๋๋‹ค.

3. _, .format()

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    cursor.execute('SHOW TABLES')
    print(cursor.fetchall())

    query = "INSERT INTO artist_genres (artist_id, genre) VALUES ('{0}', '{1}')".format('2345','hip-hop')
    cursor.execute(query)
    conn.commit()
    sys.exit(0)

if __name__ == '__main__':
    main()

4. Dictionary์™€ JSON Package

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    headers = get_headers(client_id, client_secret)

    ## Spotify Search api
    params = {
        'q': 'BTS',
        'type': 'artist',
        'limit': '5'
    }

    r = requests.get('https://api.spotify.com/v1/search', params=params, headers=headers)
    raw = json.loads(r.text)

    print(raw['artists'].keys)

def get_headers(client_id, client_secret):
    endpoint = 'https://accounts.spotify.com/api/token'
    encoded = base64.b64encode("{}:{}".format(client_id, client_secret).encode('utf-8')).decode('ascii')

    headers = {'Authorization': 'Basic {}'.format(encoded)}
    payload = {'grant_type': 'client_credentials'}

    r = requests.post(endpoint, data=payload, headers=headers)
    access_token = json.loads(r.text)['access_token']
    headers = {'Authorization': "Bearer {}".format(access_token)}

    return headers

if __name__ == '__main__':
    main()

5. Duplicate Record ํ•ธ๋“ค๋ง

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    headers = get_headers(client_id, client_secret)

    ## Spotify Search api
    params = {
        'q': 'BTS',
        'type': 'artist',
        'limit': '1'
    }

    r = requests.get('https://api.spotify.com/v1/search', params=params, headers=headers)
    raw = json.loads(r.text)

    artist_raw = raw['artists']['items'][0]
    if artist_raw['name'] == params['q']:
        artist = {
            'id': artist_raw['id'],
            'name': artist_raw['name'],
            'followers': artist_raw['followers']['total'],
            'popularity': artist_raw['popularity'],
            'url': artist_raw['external_urls']['spotify'],
            'image_url': artist_raw['images'][0]['url']
        }

    query = """
        INSERT INTO artists (id, name, followers, popularity, url, image_url)
        VALUES ('{}', '{}', {}, {}, '{}', '{}')
        ON DUPLICATE KEY UPDATE id='{}', name='{}', followers={}, popularity={}, url='{}', image_url='{}'
    """.format(
        artist['id'],
        artist['name'],
        artist['followers'],
        artist['popularity'],
        artist['url'],
        artist['image_url'],
        artist['id'],
        artist['name'],
        artist['followers'],
        artist['popularity'],
        artist['url'],
        artist['image_url']
    )

    cursor.execute(query)
    conn.commit()

6. Duplicate Record ํ•ธ๋“ค๋ง์„ ์œ„ํ•œ ํŒŒ์ด์ฌ ํ•จ์ˆ˜

5์™€ ๋‹ค๋ฅธ ์ ์„ ๋ˆˆ์—ฌ๊ฒจ ๋ณด๊ธฐ (5๋ฅผ ๋ณด๋‹ค ๊ฐ„๋‹จํ•˜๊ฒŒ ํ•œ ์ฝ”๋“œ)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def main():
    ## .... ์—ฌ๊ธฐ๊นŒ์ง€๋Š” ์œ„์™€ ๋™์ผ

    r = requests.get('https://api.spotify.com/v1/search', params=params, headers=headers)
    raw = json.loads(r.text)

    artist = {}
    artist_raw = raw['artists']['items'][0]
    if artist_raw['name'] == params['q']:
        artist.update({
            'id': artist_raw['id'],
            'name': artist_raw['name'],
            'followers': artist_raw['followers']['total'],
            'popularity': artist_raw['popularity'],
            'url': artist_raw['external_urls']['spotify'],
            'image_url': artist_raw['images'][0]['url']
        })

    insert_row(cursor, data=artist, table='artists')
    conn.commit()

def insert_row(cursor, data, table):
    placeholders = ', '.join(['%s'] * len(data))
    columns = ', '.join(data.keys())
    key_placeholders = ', '.join(['{0}=%s'.format(k) for k in data.keys()])
    sql = 'INSERT INTO %s ( %s ) VALUES ( %s ) ON DUPLICATE KEY UPDATE %s' % (table, columns, placeholders, key_placeholders)
    cursor.execute(sql, list(data.values())*2)

7. Artist list ์ถ”์ถœํ•˜๊ธฐ

  • ํŒจ์ŠคํŠธ์บ ํผ์Šค ๊ฐ•์ขŒ๋ฅผ ํ†ตํ•ด ์ œ๊ณต๋œ artist_list.csv ํŒŒ์ผ์„ ํ™œ์šฉํ•˜์˜€๋‹ค.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    headers = get_headers(client_id, client_secret)

    artists = []
    with open('../artist_list.csv', encoding="utf-8") as f:
        raw = csv.reader(f)
        for row in raw:
            artists.append(row[0])

    for a in artists:
        params = {
            'q': a,
            'type': 'artist',
            'limit': '1'
        }

        r = requests.get('https://api.spotify.com/v1/search', params=params, headers=headers)
        raw = json.loads(r.text)

        artist = {}
        try:
            if raw['artists']['items'][0]['name'] == params['q']:
                artist.update(
                    {
                        'id': raw['artists']['items'][0]['id'],
                        'name': raw['artists']['items'][0]['name'],
                        'followers': raw['artists']['items'][0]['followers']['total'],
                        'popularity': raw['artists']['items'][0]['popularity'],
                        'url': raw['artists']['items'][0]['external_urls']['spotify'],
                        'image_url': raw['artists']['items'][0]['images'][0]['url']
                    }
                )

                insert_row(cursor, artist, 'artists')
        except:
            logging.error('NO ITEMS FROM SEARCH API')
            continue

    conn.commit()
    # sys.exit(0)

ERROR:root:NO ITEMS FROM SEARCH API์™€ ๊ฐ™์€ ์—๋Ÿฌ๊ฐ€ ์—ฌ๋Ÿฌ ๊ฐœ๊ฐ€ ๋‚˜์˜ค๊ฒŒ ๋œ๋‹ค.
๋ง๊ทธ๋Œ€๋กœ SERACH API๋ฅผ ํ†ตํ•ด์„œ ITEMS๋ฅผ ์ฐพ์ง€ ๋ชปํ•˜๊ฒŒ ๋œ ๊ฒฝ์šฐ์— ํ•ด๋‹นํ•œ๋‹ค.

8. Batch ํ˜•์‹์œผ๋กœ ๋ฐ์ดํ„ฐ ์š”์ฒญ

  • ํ•œ๋ฒˆ์— ๋ฌถ์–ด์„œ API์— ์ „๋‹ฌํ•˜๋Š” ๋ฐฉ์‹์ด๋‹ค.
  • ๋ชจ๋“  API๊ฐ€ ์ œ๊ณตํ•˜๋Š” ๊ฒƒ์€ ์•„๋‹ˆ๊ธด ํ•˜๋‹ค!
  • Spotify๋Š” ‘Get Several Artists’ํ•˜๋Š” ๋ฒ•์„ ์ œ๊ณตํ•˜๊ณ  ์žˆ๋‹ค.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def main():
    try:
        conn = pymysql.connect(host=host, user=username, passwd=password, db=database, port=port, use_unicode=True, charset='utf8')
        cursor = conn.cursor()
    except:
        logging.error('could not connect to RDS')
        sys.exit(1)

    headers = get_headers(client_id, client_secret)

    cursor.execute("SELECT id FROM artists")
    artists = []
    for (id, ) in cursor.fetchall():
        artists.append(id)

    artist_batch = [artists[i: i+50] for i in range(0, len(artists), 50)]

    for i in artist_batch:
        ids = ','.join(i)
        URL = 'https://api.spotify.com/v1/artists/?ids={}'.format(ids)

        r= requests.get(URL, headers=headers)
        raw = json.loads(r.text)
        print(raw)
        print(len(raw['artists']))
        sys.exit(0)
1
2
-- ์ œ๋Œ€๋กœ ์ž˜ ๋“ค์–ด๊ฐ”๋Š”์ง€ ํ™•์ธํ•ด๋ณด๊ธฐ
select * from artist_genres limit 10;

9. MySQL ํ…Œ์ด๋ธ”๋“ค๋กœ ๋ฐ์ดํ„ฐ ์ €์žฅ

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def main():
    ## ... ์—ฌ๊ธฐ๊นŒ์ง€๋Š” ์œ„์™€ ๋˜‘๊ฐ™์Œ
    artist_batch = [artists[i: i+50] for i in range(0, len(artists), 50)]
    artist_genres = []

    for i in artist_batch:
        ids = ','.join(i)
        URL = 'https://api.spotify.com/v1/artists/?ids={}'.format(ids)

        r= requests.get(URL, headers=headers)
        raw = json.loads(r.text)

        for artist in raw['artists']:
            for genre in artist['genres']:
                artist_genres.append(
                    {
                        'artist_id': artist['id'],
                        'genre': genre
                    }
                )

    for data in artist_genres:
        insert_row(cursor, data, 'artist_genres')

    conn.commit()
    sys.exit(0)
    
    ## ... ์•„๋ž˜๋„ ๋‹ค ๋˜‘๊ฐ™์Œ