--- linux-ec2-2.6.31.orig/MAINTAINERS +++ linux-ec2-2.6.31/MAINTAINERS @@ -897,6 +897,12 @@ S: Maintained F: drivers/net/wireless/ath/ar9170/ +ATK0110 HWMON DRIVER +M: Luca Tettamanti +L: lm-sensors@lm-sensors.org +S: Maintained +F: drivers/hwmon/asus_atk0110.c + ATI_REMOTE2 DRIVER M: Ville Syrjala S: Maintained @@ -1986,7 +1992,7 @@ F: fs/* FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER -M: Riku Voipio +M: Riku Voipio L: lm-sensors@lm-sensors.org S: Maintained F: drivers/hwmon/f75375s.c --- linux-ec2-2.6.31.orig/Makefile +++ linux-ec2-2.6.31/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 31 -EXTRAVERSION = +EXTRAVERSION = .12 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* @@ -330,14 +330,23 @@ AFLAGS_KERNEL = CFLAGS_GCOV = -fprofile-arcs -ftest-coverage +# Prefer linux-backports-modules +ifneq ($(KBUILD_SRC),) +ifneq ($(shell if test -e $(KBUILD_OUTPUT)/ubuntu-build; then echo yes; fi),yes) +UBUNTUINCLUDE := -I/usr/src/linux-headers-lbm-$(KERNELRELEASE) +endif +endif # Use LINUXINCLUDE when you must reference the include/ directory. # Needed to be compatible with the O= option -LINUXINCLUDE := -Iinclude \ +LINUXINCLUDE := $(UBUNTUINCLUDE) -Iinclude \ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ -I$(srctree)/arch/$(hdr-arch)/include \ -include include/linux/autoconf.h +# UBUNTU: Include our third party driver stuff too +LINUXINCLUDE += -Iubuntu/include $(if $(KBUILD_SRC),-I$(srctree)/ubuntu/include) + KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ @@ -467,7 +476,7 @@ # Objects we will link into vmlinux / subdirs we need to visit init-y := init/ -drivers-y := drivers/ sound/ firmware/ +drivers-y := drivers/ sound/ firmware/ ubuntu/ net-y := net/ libs-y := lib/ core-y := usr/ @@ -980,11 +989,6 @@ # All the preparing.. prepare: prepare0 -# Leave this as default for preprocessing vmlinux.lds.S, which is now -# done in arch/$(ARCH)/kernel/Makefile - -export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) - # The asm symlink changes when $(ARCH) changes. # Detect this and ask user to run make mrproper # If asm is a stale symlink (point to dir that does not exist) remove it --- linux-ec2-2.6.31.orig/drivers/Makefile +++ linux-ec2-2.6.31/drivers/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_PNP) += pnp/ obj-$(CONFIG_ARM_AMBA) += amba/ -obj-$(CONFIG_XEN) += xen/ +obj-$(CONFIG_PARAVIRT_XEN) += xen/ # regulators early, since some subsystems rely on them to initialize obj-$(CONFIG_REGULATOR) += regulator/ @@ -39,6 +39,7 @@ obj-y += base/ block/ misc/ mfd/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ +obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_IDE) += ide/ obj-$(CONFIG_SCSI) += scsi/ obj-$(CONFIG_ATA) += ata/ --- linux-ec2-2.6.31.orig/drivers/cdrom/Makefile +++ linux-ec2-2.6.31/drivers/cdrom/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_BLK_DEV_SR) += cdrom.o obj-$(CONFIG_PARIDE_PCD) += cdrom.o obj-$(CONFIG_CDROM_PKTCDVD) += cdrom.o +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += cdrom.o obj-$(CONFIG_VIOCD) += viocd.o cdrom.o obj-$(CONFIG_GDROM) += gdrom.o cdrom.o --- linux-ec2-2.6.31.orig/drivers/s390/block/dasd_diag.c +++ linux-ec2-2.6.31/drivers/s390/block/dasd_diag.c @@ -145,6 +145,15 @@ mdsk_term_io(device); rc = mdsk_init_io(device, device->block->bp_block, 0, NULL); + if (rc == 4) { + if (!(device->features & DASD_FEATURE_READONLY)) { + dev_warn(&device->cdev->dev, + "The access mode of a DIAG device changed" + " to read-only"); + device->features |= DASD_FEATURE_READONLY; + } + rc = 0; + } if (rc) dev_warn(&device->cdev->dev, "DIAG ERP failed with " "rc=%d\n", rc); @@ -433,16 +442,20 @@ for (sb = 512; sb < bsize; sb = sb << 1) block->s2b_shift++; rc = mdsk_init_io(device, block->bp_block, 0, NULL); - if (rc) { + if (rc && (rc != 4)) { dev_warn(&device->cdev->dev, "DIAG initialization " "failed with rc=%d\n", rc); rc = -EIO; } else { + if (rc == 4) + device->features |= DASD_FEATURE_READONLY; dev_info(&device->cdev->dev, - "New DASD with %ld byte/block, total size %ld KB\n", + "New DASD with %ld byte/block, total size %ld KB%s\n", (unsigned long) block->bp_block, (unsigned long) (block->blocks << - block->s2b_shift) >> 1); + block->s2b_shift) >> 1, + (rc == 4) ? ", read-only device" : ""); + rc = 0; } out_label: free_page((long) label); --- linux-ec2-2.6.31.orig/drivers/ide/ide-probe.c +++ linux-ec2-2.6.31/drivers/ide/ide-probe.c @@ -1035,15 +1035,6 @@ if (port_ops && port_ops->init_dev) port_ops->init_dev(drive); } - - ide_port_for_each_dev(i, drive, hwif) { - /* - * default to PIO Mode 0 before we figure out - * the most suited mode for the attached device - */ - if (port_ops && port_ops->set_pio_mode) - port_ops->set_pio_mode(drive, 0); - } } static void ide_init_port(ide_hwif_t *hwif, unsigned int port, --- linux-ec2-2.6.31.orig/drivers/ide/cmd64x.c +++ linux-ec2-2.6.31/drivers/ide/cmd64x.c @@ -379,7 +379,8 @@ .enablebits = {{0x00,0x00,0x00}, {0x51,0x08,0x08}}, .port_ops = &cmd64x_port_ops, .host_flags = IDE_HFLAG_CLEAR_SIMPLEX | - IDE_HFLAG_ABUSE_PREFETCH, + IDE_HFLAG_ABUSE_PREFETCH | + IDE_HFLAG_SERIALIZE, .pio_mask = ATA_PIO5, .mwdma_mask = ATA_MWDMA2, .udma_mask = 0x00, /* no udma */ @@ -389,7 +390,8 @@ .init_chipset = init_chipset_cmd64x, .enablebits = {{0x51,0x04,0x04}, {0x51,0x08,0x08}}, .port_ops = &cmd648_port_ops, - .host_flags = IDE_HFLAG_ABUSE_PREFETCH, + .host_flags = IDE_HFLAG_ABUSE_PREFETCH | + IDE_HFLAG_SERIALIZE, .pio_mask = ATA_PIO5, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA2, --- linux-ec2-2.6.31.orig/drivers/ide/slc90e66.c +++ linux-ec2-2.6.31/drivers/ide/slc90e66.c @@ -91,8 +91,7 @@ if (!(reg48 & u_flag)) pci_write_config_word(dev, 0x48, reg48|u_flag); - /* FIXME: (reg4a & a_speed) ? */ - if ((reg4a & u_speed) != u_speed) { + if ((reg4a & a_speed) != u_speed) { pci_write_config_word(dev, 0x4a, reg4a & ~a_speed); pci_read_config_word(dev, 0x4a, ®4a); pci_write_config_word(dev, 0x4a, reg4a|u_speed); --- linux-ec2-2.6.31.orig/drivers/ide/ide-lib.c +++ linux-ec2-2.6.31/drivers/ide/ide-lib.c @@ -18,6 +18,7 @@ { u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ +#ifndef CONFIG_XEN if (!PCI_DMA_BUS_IS_PHYS) { addr = BLK_BOUNCE_ANY; } else if (on && drive->media == ide_disk) { @@ -26,6 +27,16 @@ if (dev && dev->dma_mask) addr = *dev->dma_mask; } +#else + if (on && drive->media == ide_disk) { + struct device *dev = drive->hwif->dev; + + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + else if (dev && dev->dma_mask) + addr = *dev->dma_mask; + } +#endif if (drive->queue) blk_queue_bounce_limit(drive->queue, addr); --- linux-ec2-2.6.31.orig/drivers/ide/sis5513.c +++ linux-ec2-2.6.31/drivers/ide/sis5513.c @@ -2,7 +2,7 @@ * Copyright (C) 1999-2000 Andre Hedrick * Copyright (C) 2002 Lionel Bouton , Maintainer * Copyright (C) 2003 Vojtech Pavlik - * Copyright (C) 2007 Bartlomiej Zolnierkiewicz + * Copyright (C) 2007-2009 Bartlomiej Zolnierkiewicz * * May be copied or modified under the terms of the GNU General Public License * @@ -281,11 +281,13 @@ pci_read_config_byte(dev, 0x4b, ®4bh); + rw_prefetch = reg4bh & ~(0x11 << drive->dn); + if (drive->media == ide_disk) - rw_prefetch = 0x11 << drive->dn; + rw_prefetch |= 0x11 << drive->dn; - if ((reg4bh & (0x11 << drive->dn)) != rw_prefetch) - pci_write_config_byte(dev, 0x4b, reg4bh|rw_prefetch); + if (reg4bh != rw_prefetch) + pci_write_config_byte(dev, 0x4b, rw_prefetch); } static void sis_set_pio_mode(ide_drive_t *drive, const u8 pio) --- linux-ec2-2.6.31.orig/drivers/ide/ide-ioctls.c +++ linux-ec2-2.6.31/drivers/ide/ide-ioctls.c @@ -162,7 +162,7 @@ if (tf->command == ATA_CMD_SET_FEATURES && tf->feature == SETFEATURES_XFER && tf->nsect >= XFER_SW_DMA_0) { - xfer_rate = ide_find_dma_mode(drive, XFER_UDMA_6); + xfer_rate = ide_find_dma_mode(drive, tf->nsect); if (xfer_rate != tf->nsect) { err = -EINVAL; goto abort; --- linux-ec2-2.6.31.orig/drivers/i2c/i2c-core.c +++ linux-ec2-2.6.31/drivers/i2c/i2c-core.c @@ -718,6 +718,7 @@ { int res = 0; struct i2c_adapter *found; + struct i2c_client *client, *next; /* First make sure that this adapter was ever added */ mutex_lock(&core_lock); @@ -737,6 +738,16 @@ if (res) return res; + /* Remove devices instantiated from sysfs */ + list_for_each_entry_safe(client, next, &userspace_devices, detected) { + if (client->adapter == adap) { + dev_dbg(&adap->dev, "Removing %s at 0x%x\n", + client->name, client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + } + /* Detach any active clients. This can't fail, thus we do not checking the returned value. */ res = device_for_each_child(&adap->dev, NULL, __unregister_client); --- linux-ec2-2.6.31.orig/drivers/i2c/chips/tsl2550.c +++ linux-ec2-2.6.31/drivers/i2c/chips/tsl2550.c @@ -277,6 +277,7 @@ static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf) { + struct tsl2550_data *data = i2c_get_clientdata(client); u8 ch0, ch1; int ret; @@ -296,6 +297,8 @@ ret = tsl2550_calculate_lux(ch0, ch1); if (ret < 0) return ret; + if (data->operating_mode == 1) + ret *= 5; return sprintf(buf, "%d\n", ret); } --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-piix4.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-piix4.c @@ -168,7 +168,7 @@ } if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) - return -EBUSY; + return -ENODEV; if (!request_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) { dev_err(&PIIX4_dev->dev, "SMBus region 0x%x already in use!\n", @@ -259,7 +259,7 @@ piix4_smba = ((smba_en_hi << 8) | smba_en_lo) & 0xffe0; if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) - return -EBUSY; + return -ENODEV; if (!request_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) { dev_err(&PIIX4_dev->dev, "SMBus region 0x%x already in use!\n", --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-sis96x.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-sis96x.c @@ -280,7 +280,7 @@ retval = acpi_check_resource_conflict(&dev->resource[SIS96x_BAR]); if (retval) - return retval; + return -ENODEV; /* Everything is happy, let's grab the memory and set things up. */ if (!request_region(sis96x_smbus_base, SMB_IOSIZE, --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-i801.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-i801.c @@ -732,8 +732,10 @@ } err = acpi_check_resource_conflict(&dev->resource[SMBBAR]); - if (err) + if (err) { + err = -ENODEV; goto exit; + } err = pci_request_region(dev, SMBBAR, i801_driver.name); if (err) { --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-isch.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-isch.c @@ -281,7 +281,7 @@ return -ENODEV; } if (acpi_check_region(sch_smba, SMBIOSIZE, sch_driver.name)) - return -EBUSY; + return -ENODEV; if (!request_region(sch_smba, SMBIOSIZE, sch_driver.name)) { dev_err(&dev->dev, "SMBus region 0x%x already in use!\n", sch_smba); --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-viapro.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-viapro.c @@ -365,7 +365,7 @@ found: error = acpi_check_region(vt596_smba, 8, vt596_driver.name); if (error) - return error; + return -ENODEV; if (!request_region(vt596_smba, 8, vt596_driver.name)) { dev_err(&pdev->dev, "SMBus region 0x%x already in use!\n", --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-amd756.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-amd756.c @@ -364,7 +364,7 @@ error = acpi_check_region(amd756_ioport, SMB_IOSIZE, amd756_driver.name); if (error) - return error; + return -ENODEV; if (!request_region(amd756_ioport, SMB_IOSIZE, amd756_driver.name)) { dev_err(&pdev->dev, "SMB region 0x%x already in use!\n", --- linux-ec2-2.6.31.orig/drivers/i2c/busses/i2c-amd8111.c +++ linux-ec2-2.6.31/drivers/i2c/busses/i2c-amd8111.c @@ -376,8 +376,10 @@ smbus->size = pci_resource_len(dev, 0); error = acpi_check_resource_conflict(&dev->resource[0]); - if (error) + if (error) { + error = -ENODEV; goto out_kfree; + } if (!request_region(smbus->base, smbus->size, amd8111_driver.name)) { error = -EBUSY; --- linux-ec2-2.6.31.orig/drivers/serial/suncore.h +++ linux-ec2-2.6.31/drivers/serial/suncore.h @@ -26,7 +26,8 @@ extern void sunserial_unregister_minors(struct uart_driver *, int); extern int sunserial_console_match(struct console *, struct device_node *, - struct uart_driver *, int); -extern void sunserial_console_termios(struct console *); + struct uart_driver *, int, bool); +extern void sunserial_console_termios(struct console *, + struct device_node *); #endif /* !(_SERIAL_SUN_H) */ --- linux-ec2-2.6.31.orig/drivers/serial/sunzilog.c +++ linux-ec2-2.6.31/drivers/serial/sunzilog.c @@ -1180,7 +1180,7 @@ (sunzilog_reg.minor - 64) + con->index, con->index); /* Get firmware console settings. */ - sunserial_console_termios(con); + sunserial_console_termios(con, to_of_device(up->port.dev)->node); /* Firmware console speed is limited to 150-->38400 baud so * this hackish cflag thing is OK. @@ -1416,7 +1416,8 @@ if (!keyboard_mouse) { if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node, - &sunzilog_reg, up[0].port.line)) + &sunzilog_reg, up[0].port.line, + false)) up->flags |= SUNZILOG_FLAG_IS_CONS; err = uart_add_one_port(&sunzilog_reg, &up[0].port); if (err) { @@ -1425,7 +1426,8 @@ return err; } if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node, - &sunzilog_reg, up[1].port.line)) + &sunzilog_reg, up[1].port.line, + false)) up->flags |= SUNZILOG_FLAG_IS_CONS; err = uart_add_one_port(&sunzilog_reg, &up[1].port); if (err) { --- linux-ec2-2.6.31.orig/drivers/serial/sunsab.c +++ linux-ec2-2.6.31/drivers/serial/sunsab.c @@ -883,7 +883,7 @@ printk("Console: ttyS%d (SAB82532)\n", (sunsab_reg.minor - 64) + con->index); - sunserial_console_termios(con); + sunserial_console_termios(con, to_of_device(up->port.dev)->node); switch (con->cflag & CBAUD) { case B150: baud = 150; break; @@ -1027,10 +1027,12 @@ goto out1; sunserial_console_match(SUNSAB_CONSOLE(), op->node, - &sunsab_reg, up[0].port.line); + &sunsab_reg, up[0].port.line, + false); sunserial_console_match(SUNSAB_CONSOLE(), op->node, - &sunsab_reg, up[1].port.line); + &sunsab_reg, up[1].port.line, + false); err = uart_add_one_port(&sunsab_reg, &up[0].port); if (err) @@ -1116,7 +1118,6 @@ if (!sunsab_ports) return -ENOMEM; - sunsab_reg.cons = SUNSAB_CONSOLE(); err = sunserial_register_minors(&sunsab_reg, num_channels); if (err) { kfree(sunsab_ports); --- linux-ec2-2.6.31.orig/drivers/serial/bfin_5xx.c +++ linux-ec2-2.6.31/drivers/serial/bfin_5xx.c @@ -42,6 +42,10 @@ # undef CONFIG_EARLY_PRINTK #endif +#ifdef CONFIG_SERIAL_BFIN_MODULE +# undef CONFIG_EARLY_PRINTK +#endif + /* UART name and device definitions */ #define BFIN_SERIAL_NAME "ttyBF" #define BFIN_SERIAL_MAJOR 204 --- linux-ec2-2.6.31.orig/drivers/serial/serial_cs.c +++ linux-ec2-2.6.31/drivers/serial/serial_cs.c @@ -884,6 +884,7 @@ PCMCIA_DEVICE_CIS_MANF_CARD(0x0192, 0xa555, "SW_555_SER.cis"), /* Sierra Aircard 555 CDMA 1xrtt Modem -- pre update */ PCMCIA_DEVICE_CIS_MANF_CARD(0x013f, 0xa555, "SW_555_SER.cis"), /* Sierra Aircard 555 CDMA 1xrtt Modem -- post update */ PCMCIA_DEVICE_CIS_PROD_ID12("MultiTech", "PCMCIA 56K DataFax", 0x842047ee, 0xc2efcf03, "MT5634ZLX.cis"), + PCMCIA_DEVICE_CIS_PROD_ID12("ADVANTECH", "COMpad-32/85B-2", 0x96913a85, 0x27ab5437, "COMpad2.cis"), PCMCIA_DEVICE_CIS_PROD_ID12("ADVANTECH", "COMpad-32/85B-4", 0x96913a85, 0xcec8f102, "COMpad4.cis"), PCMCIA_DEVICE_CIS_PROD_ID123("ADVANTECH", "COMpad-32/85", "1.0", 0x96913a85, 0x8fbe92ae, 0x0877b627, "COMpad2.cis"), PCMCIA_DEVICE_CIS_PROD_ID2("RS-COM 2P", 0xad20b156, "RS-COM-2P.cis"), --- linux-ec2-2.6.31.orig/drivers/serial/sunsu.c +++ linux-ec2-2.6.31/drivers/serial/sunsu.c @@ -1329,11 +1329,9 @@ */ static int __init sunsu_console_setup(struct console *co, char *options) { + static struct ktermios dummy; + struct ktermios termios; struct uart_port *port; - int baud = 9600; - int bits = 8; - int parity = 'n'; - int flow = 'n'; printk("Console: ttyS%d (SU)\n", (sunsu_reg.minor - 64) + co->index); @@ -1352,10 +1350,15 @@ */ spin_lock_init(&port->lock); - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); + /* Get firmware console settings. */ + sunserial_console_termios(co, to_of_device(port->dev)->node); - return uart_set_options(port, co, baud, parity, bits, flow); + memset(&termios, 0, sizeof(struct ktermios)); + termios.c_cflag = co->cflag; + port->mctrl |= TIOCM_DTR; + port->ops->set_termios(port, &termios, &dummy); + + return 0; } static struct console sunsu_console = { @@ -1409,6 +1412,7 @@ struct uart_sunsu_port *up; struct resource *rp; enum su_type type; + bool ignore_line; int err; type = su_get_type(dp); @@ -1467,8 +1471,14 @@ up->port.ops = &sunsu_pops; + ignore_line = false; + if (!strcmp(dp->name, "rsc-console") || + !strcmp(dp->name, "lom-console")) + ignore_line = true; + sunserial_console_match(SUNSU_CONSOLE(), dp, - &sunsu_reg, up->port.line); + &sunsu_reg, up->port.line, + ignore_line); err = uart_add_one_port(&sunsu_reg, &up->port); if (err) goto out_unmap; @@ -1517,6 +1527,10 @@ .name = "serial", .compatible = "su", }, + { + .type = "serial", + .compatible = "su", + }, {}, }; MODULE_DEVICE_TABLE(of, su_match); @@ -1548,6 +1562,12 @@ num_uart++; } } + for_each_node_by_type(dp, "serial") { + if (of_device_is_compatible(dp, "su")) { + if (su_get_type(dp) == SU_PORT_PORT) + num_uart++; + } + } if (num_uart) { err = sunserial_register_minors(&sunsu_reg, num_uart); --- linux-ec2-2.6.31.orig/drivers/serial/Kconfig +++ linux-ec2-2.6.31/drivers/serial/Kconfig @@ -9,6 +9,7 @@ # The new 8250/16550 serial drivers config SERIAL_8250 tristate "8250/16550 and compatible serial support" + depends on !XEN_DISABLE_SERIAL select SERIAL_CORE ---help--- This selects whether you want to include the driver for the standard --- linux-ec2-2.6.31.orig/drivers/serial/suncore.c +++ linux-ec2-2.6.31/drivers/serial/suncore.c @@ -53,20 +53,21 @@ EXPORT_SYMBOL(sunserial_unregister_minors); int sunserial_console_match(struct console *con, struct device_node *dp, - struct uart_driver *drv, int line) + struct uart_driver *drv, int line, bool ignore_line) { - int off; - if (!con || of_console_device != dp) return 0; - off = 0; - if (of_console_options && - *of_console_options == 'b') - off = 1; + if (!ignore_line) { + int off = 0; - if ((line & 1) != off) - return 0; + if (of_console_options && + *of_console_options == 'b') + off = 1; + + if ((line & 1) != off) + return 0; + } con->index = line; drv->cons = con; @@ -76,23 +77,24 @@ } EXPORT_SYMBOL(sunserial_console_match); -void -sunserial_console_termios(struct console *con) +void sunserial_console_termios(struct console *con, struct device_node *uart_dp) { - struct device_node *dp; - const char *od, *mode, *s; + const char *mode, *s; char mode_prop[] = "ttyX-mode"; int baud, bits, stop, cflag; char parity; - dp = of_find_node_by_path("/options"); - od = of_get_property(dp, "output-device", NULL); - if (!strcmp(od, "rsc")) { - mode = of_get_property(of_console_device, + if (!strcmp(uart_dp->name, "rsc") || + !strcmp(uart_dp->name, "rsc-console") || + !strcmp(uart_dp->name, "rsc-control")) { + mode = of_get_property(uart_dp, "ssp-console-modes", NULL); if (!mode) mode = "115200,8,n,1,-"; + } else if (!strcmp(uart_dp->name, "lom-console")) { + mode = "9600,8,n,1,-"; } else { + struct device_node *dp; char c; c = 'a'; @@ -101,6 +103,7 @@ mode_prop[3] = c; + dp = of_find_node_by_path("/options"); mode = of_get_property(dp, mode_prop, NULL); if (!mode) mode = "9600,8,n,1,-"; --- linux-ec2-2.6.31.orig/drivers/serial/8250_pci.c +++ linux-ec2-2.6.31/drivers/serial/8250_pci.c @@ -1561,6 +1561,7 @@ pbn_exar_XR17C152, pbn_exar_XR17C154, pbn_exar_XR17C158, + pbn_exar_ibm_saturn, pbn_pasemi_1682M, pbn_ni8430_2, pbn_ni8430_4, @@ -2146,6 +2147,13 @@ .base_baud = 921600, .uart_offset = 0x200, }, + [pbn_exar_ibm_saturn] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 0x200, + }, + /* * PA Semi PWRficient PA6T-1682M on-chip UART */ @@ -2649,6 +2657,9 @@ PCI_SUBVENDOR_ID_CONNECT_TECH, PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485, 0, 0, pbn_b0_8_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152, + PCI_VENDOR_ID_IBM, PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT, + 0, 0, pbn_exar_ibm_saturn }, { PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_U530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, --- linux-ec2-2.6.31.orig/drivers/serial/sunhv.c +++ linux-ec2-2.6.31/drivers/serial/sunhv.c @@ -566,7 +566,7 @@ goto out_free_con_read_page; sunserial_console_match(&sunhv_console, op->node, - &sunhv_reg, port->line); + &sunhv_reg, port->line, false); err = uart_add_one_port(&sunhv_reg, port); if (err) --- linux-ec2-2.6.31.orig/drivers/serial/of_serial.c +++ linux-ec2-2.6.31/drivers/serial/of_serial.c @@ -161,6 +161,7 @@ static struct of_device_id __devinitdata of_platform_serial_table[] = { { .type = "serial", .compatible = "ns8250", .data = (void *)PORT_8250, }, { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, + { .type = "serial", .compatible = "ns16550a", .data = (void *)PORT_16550A, }, { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, { .type = "serial", .compatible = "ns16850", .data = (void *)PORT_16850, }, --- linux-ec2-2.6.31.orig/drivers/serial/8250.c +++ linux-ec2-2.6.31/drivers/serial/8250.c @@ -1337,14 +1337,12 @@ serial_out(up, UART_IER, up->ier); if (up->bugs & UART_BUG_TXEN) { - unsigned char lsr, iir; + unsigned char lsr; lsr = serial_in(up, UART_LSR); up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - iir = serial_in(up, UART_IIR) & 0x0f; if ((up->port.type == PORT_RM9000) ? - (lsr & UART_LSR_THRE && - (iir == UART_IIR_NO_INT || iir == UART_IIR_THRI)) : - (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT)) + (lsr & UART_LSR_THRE) : + (lsr & UART_LSR_TEMT)) transmit_chars(up); } } --- linux-ec2-2.6.31.orig/drivers/block/nbd.c +++ linux-ec2-2.6.31/drivers/block/nbd.c @@ -56,7 +56,7 @@ static unsigned int nbds_max = 16; static struct nbd_device *nbd_dev; -static int max_part; +static int max_part = 15; /* * Use just one lock (or at most 1 per NIC). Two arguments for this: --- linux-ec2-2.6.31.orig/drivers/block/Kconfig +++ linux-ec2-2.6.31/drivers/block/Kconfig @@ -458,9 +458,9 @@ help Include support for the Xilinx SystemACE CompactFlash interface -config XEN_BLKDEV_FRONTEND +config XEN_BLKFRONT tristate "Xen virtual block device support" - depends on XEN + depends on PARAVIRT_XEN default y help This driver implements the front-end of the Xen virtual --- linux-ec2-2.6.31.orig/drivers/block/xen-blkfront.c +++ linux-ec2-2.6.31/drivers/block/xen-blkfront.c @@ -1056,7 +1056,6 @@ static struct xenbus_driver blkfront = { .name = "vbd", - .owner = THIS_MODULE, .ids = blkfront_ids, .probe = blkfront_probe, .remove = blkfront_remove, --- linux-ec2-2.6.31.orig/drivers/block/Makefile +++ linux-ec2-2.6.31/drivers/block/Makefile @@ -35,6 +35,6 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKFRONT) += xen-blkfront.o swim_mod-objs := swim.o swim_asm.o --- linux-ec2-2.6.31.orig/drivers/cpufreq/Kconfig +++ linux-ec2-2.6.31/drivers/cpufreq/Kconfig @@ -1,5 +1,6 @@ config CPU_FREQ bool "CPU Frequency scaling" + depends on !PROCESSOR_EXTERNAL_CONTROL help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because --- linux-ec2-2.6.31.orig/drivers/isdn/gigaset/interface.c +++ linux-ec2-2.6.31/drivers/isdn/gigaset/interface.c @@ -408,33 +408,28 @@ return retval; } -/* FIXME: This function does not have error returns */ - static int if_chars_in_buffer(struct tty_struct *tty) { struct cardstate *cs; - int retval = -ENODEV; + int retval = 0; cs = (struct cardstate *) tty->driver_data; if (!cs) { pr_err("%s: no cardstate\n", __func__); - return -ENODEV; + return 0; } gig_dbg(DEBUG_IF, "%u: %s()", cs->minor_index, __func__); - if (mutex_lock_interruptible(&cs->mutex)) - return -ERESTARTSYS; // FIXME -EINTR? + mutex_lock(&cs->mutex); - if (!cs->connected) { + if (!cs->connected) gig_dbg(DEBUG_IF, "not connected"); - retval = -ENODEV; - } else if (!cs->open_count) + else if (!cs->open_count) dev_warn(cs->dev, "%s: device not opened\n", __func__); - else if (cs->mstate != MS_LOCKED) { + else if (cs->mstate != MS_LOCKED) dev_warn(cs->dev, "can't write to unlocked device\n"); - retval = -EBUSY; - } else + else retval = cs->ops->chars_in_buffer(cs); mutex_unlock(&cs->mutex); --- linux-ec2-2.6.31.orig/drivers/isdn/hisax/hfc_usb.c +++ linux-ec2-2.6.31/drivers/isdn/hisax/hfc_usb.c @@ -817,8 +817,8 @@ } /* we have a complete hdlc packet */ if (finish) { - if ((!fifo->skbuff->data[fifo->skbuff->len - 1]) - && (fifo->skbuff->len > 3)) { + if (fifo->skbuff->len > 3 && + !fifo->skbuff->data[fifo->skbuff->len - 1]) { if (fifon == HFCUSB_D_RX) { DBG(HFCUSB_DBG_DCHANNEL, --- linux-ec2-2.6.31.orig/drivers/isdn/i4l/isdn_ppp.c +++ linux-ec2-2.6.31/drivers/isdn/i4l/isdn_ppp.c @@ -1535,10 +1535,8 @@ int sz = ISDN_MAX_CHANNELS*sizeof(ippp_bundle); if( (isdn_ppp_bundle_arr = kzalloc(sz, GFP_KERNEL)) == NULL ) return -ENOMEM; - for (i = 0; i < ISDN_MAX_CHANNELS; i++) { + for( i = 0; i < ISDN_MAX_CHANNELS; i++ ) spin_lock_init(&isdn_ppp_bundle_arr[i].lock); - skb_queue_head_init(&isdn_ppp_bundle_arr[i].frags); - } return 0; } @@ -1571,7 +1569,7 @@ if ((lp->netdev->pb = isdn_ppp_mp_bundle_alloc()) == NULL) return -ENOMEM; lp->next = lp->last = lp; /* nobody else in a queue */ - skb_queue_head_init(&lp->netdev->pb->frags); + lp->netdev->pb->frags = NULL; lp->netdev->pb->frames = 0; lp->netdev->pb->seq = UINT_MAX; } @@ -1583,29 +1581,28 @@ static u32 isdn_ppp_mp_get_seq( int short_seq, struct sk_buff * skb, u32 last_seq ); -static void isdn_ppp_mp_discard(ippp_bundle *mp, struct sk_buff *from, - struct sk_buff *to); -static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *from, struct sk_buff *to, - u32 lastseq); -static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb); +static struct sk_buff * isdn_ppp_mp_discard( ippp_bundle * mp, + struct sk_buff * from, struct sk_buff * to ); +static void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp, + struct sk_buff * from, struct sk_buff * to ); +static void isdn_ppp_mp_free_skb( ippp_bundle * mp, struct sk_buff * skb ); static void isdn_ppp_mp_print_recv_pkt( int slot, struct sk_buff * skb ); static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp, - struct sk_buff *skb) + struct sk_buff *skb) { - struct sk_buff *newfrag, *frag, *start, *nextf; - u32 newseq, minseq, thisseq; - isdn_mppp_stats *stats; struct ippp_struct *is; + isdn_net_local * lpq; + ippp_bundle * mp; + isdn_mppp_stats * stats; + struct sk_buff * newfrag, * frag, * start, *nextf; + u32 newseq, minseq, thisseq; unsigned long flags; - isdn_net_local *lpq; - ippp_bundle *mp; int slot; spin_lock_irqsave(&net_dev->pb->lock, flags); - mp = net_dev->pb; - stats = &mp->stats; + mp = net_dev->pb; + stats = &mp->stats; slot = lp->ppp_slot; if (slot < 0 || slot >= ISDN_MAX_CHANNELS) { printk(KERN_ERR "%s: lp->ppp_slot(%d)\n", @@ -1616,19 +1613,20 @@ return; } is = ippp_table[slot]; - if (++mp->frames > stats->max_queue_len) + if( ++mp->frames > stats->max_queue_len ) stats->max_queue_len = mp->frames; if (is->debug & 0x8) isdn_ppp_mp_print_recv_pkt(lp->ppp_slot, skb); newseq = isdn_ppp_mp_get_seq(is->mpppcfg & SC_IN_SHORT_SEQ, - skb, is->last_link_seqno); + skb, is->last_link_seqno); + /* if this packet seq # is less than last already processed one, * toss it right away, but check for sequence start case first */ - if (mp->seq > MP_LONGSEQ_MAX && (newseq & MP_LONGSEQ_MAXBIT)) { + if( mp->seq > MP_LONGSEQ_MAX && (newseq & MP_LONGSEQ_MAXBIT) ) { mp->seq = newseq; /* the first packet: required for * rfc1990 non-compliant clients -- * prevents constant packet toss */ @@ -1659,31 +1657,22 @@ * packets */ newfrag = skb; - /* Insert new fragment into the proper sequence slot. */ - skb_queue_walk(&mp->frags, frag) { - if (MP_SEQ(frag) == newseq) { - isdn_ppp_mp_free_skb(mp, newfrag); - newfrag = NULL; - break; - } - if (MP_LT(newseq, MP_SEQ(frag))) { - __skb_queue_before(&mp->frags, frag, newfrag); - newfrag = NULL; - break; - } - } - if (newfrag) - __skb_queue_tail(&mp->frags, newfrag); + /* if this new fragment is before the first one, then enqueue it now. */ + if ((frag = mp->frags) == NULL || MP_LT(newseq, MP_SEQ(frag))) { + newfrag->next = frag; + mp->frags = frag = newfrag; + newfrag = NULL; + } - frag = skb_peek(&mp->frags); - start = ((MP_FLAGS(frag) & MP_BEGIN_FRAG) && - (MP_SEQ(frag) == mp->seq)) ? frag : NULL; - if (!start) - goto check_overflow; + start = MP_FLAGS(frag) & MP_BEGIN_FRAG && + MP_SEQ(frag) == mp->seq ? frag : NULL; - /* main fragment traversing loop + /* + * main fragment traversing loop * * try to accomplish several tasks: + * - insert new fragment into the proper sequence slot (once that's done + * newfrag will be set to NULL) * - reassemble any complete fragment sequence (non-null 'start' * indicates there is a continguous sequence present) * - discard any incomplete sequences that are below minseq -- due @@ -1692,46 +1681,71 @@ * come to complete such sequence and it should be discarded * * loop completes when we accomplished the following tasks: + * - new fragment is inserted in the proper sequence ('newfrag' is + * set to NULL) * - we hit a gap in the sequence, so no reassembly/processing is * possible ('start' would be set to NULL) * * algorithm for this code is derived from code in the book * 'PPP Design And Debugging' by James Carlson (Addison-Wesley) */ - skb_queue_walk_safe(&mp->frags, frag, nextf) { - thisseq = MP_SEQ(frag); + while (start != NULL || newfrag != NULL) { + + thisseq = MP_SEQ(frag); + nextf = frag->next; - /* check for misplaced start */ - if (start != frag && (MP_FLAGS(frag) & MP_BEGIN_FRAG)) { - printk(KERN_WARNING"isdn_mppp(seq %d): new " - "BEGIN flag with no prior END", thisseq); - stats->seqerrs++; - stats->frame_drops++; - isdn_ppp_mp_discard(mp, start, frag); - start = frag; - } else if (MP_LE(thisseq, minseq)) { - if (MP_FLAGS(frag) & MP_BEGIN_FRAG) + /* drop any duplicate fragments */ + if (newfrag != NULL && thisseq == newseq) { + isdn_ppp_mp_free_skb(mp, newfrag); + newfrag = NULL; + } + + /* insert new fragment before next element if possible. */ + if (newfrag != NULL && (nextf == NULL || + MP_LT(newseq, MP_SEQ(nextf)))) { + newfrag->next = nextf; + frag->next = nextf = newfrag; + newfrag = NULL; + } + + if (start != NULL) { + /* check for misplaced start */ + if (start != frag && (MP_FLAGS(frag) & MP_BEGIN_FRAG)) { + printk(KERN_WARNING"isdn_mppp(seq %d): new " + "BEGIN flag with no prior END", thisseq); + stats->seqerrs++; + stats->frame_drops++; + start = isdn_ppp_mp_discard(mp, start,frag); + nextf = frag->next; + } + } else if (MP_LE(thisseq, minseq)) { + if (MP_FLAGS(frag) & MP_BEGIN_FRAG) start = frag; - else { + else { if (MP_FLAGS(frag) & MP_END_FRAG) - stats->frame_drops++; - __skb_unlink(skb, &mp->frags); + stats->frame_drops++; + if( mp->frags == frag ) + mp->frags = nextf; isdn_ppp_mp_free_skb(mp, frag); + frag = nextf; continue; - } + } } - /* if we have end fragment, then we have full reassembly - * sequence -- reassemble and process packet now + /* if start is non-null and we have end fragment, then + * we have full reassembly sequence -- reassemble + * and process packet now */ - if (MP_FLAGS(frag) & MP_END_FRAG) { - minseq = mp->seq = (thisseq+1) & MP_LONGSEQ_MASK; - /* Reassemble the packet then dispatch it */ - isdn_ppp_mp_reassembly(net_dev, lp, start, frag, thisseq); + if (start != NULL && (MP_FLAGS(frag) & MP_END_FRAG)) { + minseq = mp->seq = (thisseq+1) & MP_LONGSEQ_MASK; + /* Reassemble the packet then dispatch it */ + isdn_ppp_mp_reassembly(net_dev, lp, start, nextf); - start = NULL; - frag = NULL; - } + start = NULL; + frag = NULL; + + mp->frags = nextf; + } /* check if need to update start pointer: if we just * reassembled the packet and sequence is contiguous @@ -1742,25 +1756,26 @@ * below low watermark and set start to the next frag or * clear start ptr. */ - if (nextf != (struct sk_buff *)&mp->frags && + if (nextf != NULL && ((thisseq+1) & MP_LONGSEQ_MASK) == MP_SEQ(nextf)) { - /* if we just reassembled and the next one is here, - * then start another reassembly. - */ - if (frag == NULL) { + /* if we just reassembled and the next one is here, + * then start another reassembly. */ + + if (frag == NULL) { if (MP_FLAGS(nextf) & MP_BEGIN_FRAG) - start = nextf; - else { - printk(KERN_WARNING"isdn_mppp(seq %d):" - " END flag with no following " - "BEGIN", thisseq); + start = nextf; + else + { + printk(KERN_WARNING"isdn_mppp(seq %d):" + " END flag with no following " + "BEGIN", thisseq); stats->seqerrs++; } } - } else { - if (nextf != (struct sk_buff *)&mp->frags && - frag != NULL && - MP_LT(thisseq, minseq)) { + + } else { + if ( nextf != NULL && frag != NULL && + MP_LT(thisseq, minseq)) { /* we've got a break in the sequence * and we not at the end yet * and we did not just reassembled @@ -1769,39 +1784,41 @@ * discard all the frames below low watermark * and start over */ stats->frame_drops++; - isdn_ppp_mp_discard(mp, start, nextf); + mp->frags = isdn_ppp_mp_discard(mp,start,nextf); } /* break in the sequence, no reassembly */ - start = NULL; - } - if (!start) - break; - } + start = NULL; + } + + frag = nextf; + } /* while -- main loop */ + + if (mp->frags == NULL) + mp->frags = frag; -check_overflow: /* rather straighforward way to deal with (not very) possible - * queue overflow - */ + * queue overflow */ if (mp->frames > MP_MAX_QUEUE_LEN) { stats->overflows++; - skb_queue_walk_safe(&mp->frags, frag, nextf) { - if (mp->frames <= MP_MAX_QUEUE_LEN) - break; - __skb_unlink(frag, &mp->frags); - isdn_ppp_mp_free_skb(mp, frag); + while (mp->frames > MP_MAX_QUEUE_LEN) { + frag = mp->frags->next; + isdn_ppp_mp_free_skb(mp, mp->frags); + mp->frags = frag; } } spin_unlock_irqrestore(&mp->lock, flags); } -static void isdn_ppp_mp_cleanup(isdn_net_local *lp) +static void isdn_ppp_mp_cleanup( isdn_net_local * lp ) { - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&lp->netdev->pb->frags, skb, tmp) { - __skb_unlink(skb, &lp->netdev->pb->frags); - isdn_ppp_mp_free_skb(lp->netdev->pb, skb); + struct sk_buff * frag = lp->netdev->pb->frags; + struct sk_buff * nextfrag; + while( frag ) { + nextfrag = frag->next; + isdn_ppp_mp_free_skb(lp->netdev->pb, frag); + frag = nextfrag; } + lp->netdev->pb->frags = NULL; } static u32 isdn_ppp_mp_get_seq( int short_seq, @@ -1838,115 +1855,72 @@ return seq; } -static void isdn_ppp_mp_discard(ippp_bundle *mp, struct sk_buff *from, - struct sk_buff *to) +struct sk_buff * isdn_ppp_mp_discard( ippp_bundle * mp, + struct sk_buff * from, struct sk_buff * to ) { - if (from) { - struct sk_buff *skb, *tmp; - int freeing = 0; - - skb_queue_walk_safe(&mp->frags, skb, tmp) { - if (skb == to) - break; - if (skb == from) - freeing = 1; - if (!freeing) - continue; - __skb_unlink(skb, &mp->frags); - isdn_ppp_mp_free_skb(mp, skb); + if( from ) + while (from != to) { + struct sk_buff * next = from->next; + isdn_ppp_mp_free_skb(mp, from); + from = next; } - } -} - -static unsigned int calc_tot_len(struct sk_buff_head *queue, - struct sk_buff *from, struct sk_buff *to) -{ - unsigned int tot_len = 0; - struct sk_buff *skb; - int found_start = 0; - - skb_queue_walk(queue, skb) { - if (skb == from) - found_start = 1; - if (!found_start) - continue; - tot_len += skb->len - MP_HEADER_LEN; - if (skb == to) - break; - } - return tot_len; + return from; } -/* Reassemble packet using fragments in the reassembly queue from - * 'from' until 'to', inclusive. - */ -static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *from, struct sk_buff *to, - u32 lastseq) +void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp, + struct sk_buff * from, struct sk_buff * to ) { - ippp_bundle *mp = net_dev->pb; - unsigned int tot_len; - struct sk_buff *skb; + ippp_bundle * mp = net_dev->pb; int proto; + struct sk_buff * skb; + unsigned int tot_len; if (lp->ppp_slot < 0 || lp->ppp_slot >= ISDN_MAX_CHANNELS) { printk(KERN_ERR "%s: lp->ppp_slot(%d) out of range\n", __func__, lp->ppp_slot); return; } - - tot_len = calc_tot_len(&mp->frags, from, to); - - if (MP_FLAGS(from) == (MP_BEGIN_FRAG | MP_END_FRAG)) { - if (ippp_table[lp->ppp_slot]->debug & 0x40) + if( MP_FLAGS(from) == (MP_BEGIN_FRAG | MP_END_FRAG) ) { + if( ippp_table[lp->ppp_slot]->debug & 0x40 ) printk(KERN_DEBUG "isdn_mppp: reassembly: frame %d, " - "len %d\n", MP_SEQ(from), from->len); + "len %d\n", MP_SEQ(from), from->len ); skb = from; skb_pull(skb, MP_HEADER_LEN); - __skb_unlink(skb, &mp->frags); mp->frames--; } else { - struct sk_buff *walk, *tmp; - int found_start = 0; + struct sk_buff * frag; + int n; - if (ippp_table[lp->ppp_slot]->debug & 0x40) - printk(KERN_DEBUG"isdn_mppp: reassembling frames %d " - "to %d, len %d\n", MP_SEQ(from), lastseq, - tot_len); + for(tot_len=n=0, frag=from; frag != to; frag=frag->next, n++) + tot_len += frag->len - MP_HEADER_LEN; - skb = dev_alloc_skb(tot_len); - if (!skb) + if( ippp_table[lp->ppp_slot]->debug & 0x40 ) + printk(KERN_DEBUG"isdn_mppp: reassembling frames %d " + "to %d, len %d\n", MP_SEQ(from), + (MP_SEQ(from)+n-1) & MP_LONGSEQ_MASK, tot_len ); + if( (skb = dev_alloc_skb(tot_len)) == NULL ) { printk(KERN_ERR "isdn_mppp: cannot allocate sk buff " - "of size %d\n", tot_len); - - found_start = 0; - skb_queue_walk_safe(&mp->frags, walk, tmp) { - if (walk == from) - found_start = 1; - if (!found_start) - continue; + "of size %d\n", tot_len); + isdn_ppp_mp_discard(mp, from, to); + return; + } - if (skb) { - unsigned int len = walk->len - MP_HEADER_LEN; - skb_copy_from_linear_data_offset(walk, MP_HEADER_LEN, - skb_put(skb, len), - len); - } - __skb_unlink(walk, &mp->frags); - isdn_ppp_mp_free_skb(mp, walk); + while( from != to ) { + unsigned int len = from->len - MP_HEADER_LEN; - if (walk == to) - break; + skb_copy_from_linear_data_offset(from, MP_HEADER_LEN, + skb_put(skb,len), + len); + frag = from->next; + isdn_ppp_mp_free_skb(mp, from); + from = frag; } } - if (!skb) - return; - proto = isdn_ppp_strip_proto(skb); isdn_ppp_push_higher(net_dev, lp, skb, proto); } -static void isdn_ppp_mp_free_skb(ippp_bundle *mp, struct sk_buff *skb) +static void isdn_ppp_mp_free_skb(ippp_bundle * mp, struct sk_buff * skb) { dev_kfree_skb(skb); mp->frames--; --- linux-ec2-2.6.31.orig/drivers/ata/sata_nv.c +++ linux-ec2-2.6.31/drivers/ata/sata_nv.c @@ -1594,9 +1594,21 @@ !ata_dev_enabled(link->device)) sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, NULL, NULL); - else if (!(ehc->i.flags & ATA_EHI_QUIET)) - ata_link_printk(link, KERN_INFO, - "nv: skipping hardreset on occupied port\n"); + else { + const unsigned long *timing = sata_ehc_deb_timing(ehc); + int rc; + + if (!(ehc->i.flags & ATA_EHI_QUIET)) + ata_link_printk(link, KERN_INFO, "nv: skipping " + "hardreset on occupied port\n"); + + /* make sure the link is online */ + rc = sata_link_resume(link, timing, deadline); + /* whine about phy resume failure but proceed */ + if (rc && rc != -EOPNOTSUPP) + ata_link_printk(link, KERN_WARNING, "failed to resume " + "link (errno=%d)\n", rc); + } /* device signature acquisition is unreliable */ return -EAGAIN; --- linux-ec2-2.6.31.orig/drivers/ata/sata_via.c +++ linux-ec2-2.6.31/drivers/ata/sata_via.c @@ -93,7 +93,6 @@ { PCI_VDEVICE(VIA, 0x7372), vt6420 }, { PCI_VDEVICE(VIA, 0x5287), vt8251 }, /* 2 sata chnls (Master/Slave) */ { PCI_VDEVICE(VIA, 0x9000), vt8251 }, - { PCI_VDEVICE(VIA, 0x9040), vt8251 }, { } /* terminate list */ }; --- linux-ec2-2.6.31.orig/drivers/ata/pata_hpt37x.c +++ linux-ec2-2.6.31/drivers/ata/pata_hpt37x.c @@ -24,7 +24,7 @@ #include #define DRV_NAME "pata_hpt37x" -#define DRV_VERSION "0.6.12" +#define DRV_VERSION "0.6.14" struct hpt_clock { u8 xfer_speed; @@ -404,9 +404,8 @@ pci_read_config_dword(pdev, addr1, ®); mode = hpt37x_find_mode(ap, adev->pio_mode); - mode &= ~0x8000000; /* No FIFO in PIO */ - mode &= ~0x30070000; /* Leave config bits alone */ - reg &= 0x30070000; /* Strip timing bits */ + mode &= 0xCFC3FFFF; /* Leave DMA bits alone */ + reg &= ~0xCFC3FFFF; /* Strip timing bits */ pci_write_config_dword(pdev, addr1, reg | mode); } @@ -423,8 +422,7 @@ { struct pci_dev *pdev = to_pci_dev(ap->host->dev); u32 addr1, addr2; - u32 reg; - u32 mode; + u32 reg, mode, mask; u8 fast; addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); @@ -436,11 +434,12 @@ fast |= 0x01; pci_write_config_byte(pdev, addr2, fast); + mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000; + pci_read_config_dword(pdev, addr1, ®); mode = hpt37x_find_mode(ap, adev->dma_mode); - mode |= 0x8000000; /* FIFO in MWDMA or UDMA */ - mode &= ~0xC0000000; /* Leave config bits alone */ - reg &= 0xC0000000; /* Strip timing bits */ + mode &= mask; + reg &= ~mask; pci_write_config_dword(pdev, addr1, reg | mode); } @@ -508,9 +507,8 @@ mode = hpt37x_find_mode(ap, adev->pio_mode); printk("Find mode for %d reports %X\n", adev->pio_mode, mode); - mode &= ~0x80000000; /* No FIFO in PIO */ - mode &= ~0x30070000; /* Leave config bits alone */ - reg &= 0x30070000; /* Strip timing bits */ + mode &= 0xCFC3FFFF; /* Leave DMA bits alone */ + reg &= ~0xCFC3FFFF; /* Strip timing bits */ pci_write_config_dword(pdev, addr1, reg | mode); } @@ -527,8 +525,7 @@ { struct pci_dev *pdev = to_pci_dev(ap->host->dev); u32 addr1, addr2; - u32 reg; - u32 mode; + u32 reg, mode, mask; u8 fast; addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); @@ -539,12 +536,13 @@ fast &= ~0x07; pci_write_config_byte(pdev, addr2, fast); + mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000; + pci_read_config_dword(pdev, addr1, ®); mode = hpt37x_find_mode(ap, adev->dma_mode); printk("Find mode for DMA %d reports %X\n", adev->dma_mode, mode); - mode &= ~0xC0000000; /* Leave config bits alone */ - mode |= 0x80000000; /* FIFO in MWDMA or UDMA */ - reg &= 0xC0000000; /* Strip timing bits */ + mode &= mask; + reg &= ~mask; pci_write_config_dword(pdev, addr1, reg | mode); } --- linux-ec2-2.6.31.orig/drivers/ata/pata_cmd64x.c +++ linux-ec2-2.6.31/drivers/ata/pata_cmd64x.c @@ -219,7 +219,7 @@ regU |= udma_data[adev->dma_mode - XFER_UDMA_0] << shift; /* Merge the control bits */ regU |= 1 << adev->devno; /* UDMA on */ - if (adev->dma_mode > 2) /* 15nS timing */ + if (adev->dma_mode > XFER_UDMA_2) /* 15nS timing */ regU |= 4 << adev->devno; } else { regU &= ~ (1 << adev->devno); /* UDMA off */ --- linux-ec2-2.6.31.orig/drivers/ata/ahci.c +++ linux-ec2-2.6.31/drivers/ata/ahci.c @@ -433,7 +433,8 @@ [board_ahci_sb600] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL | - AHCI_HFLAG_NO_MSI | AHCI_HFLAG_SECT255), + AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | + AHCI_HFLAG_SECT255), .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, @@ -2602,51 +2603,6 @@ } } -/* - * SB600 ahci controller on ASUS M2A-VM can't do 64bit DMA with older - * BIOS. The oldest version known to be broken is 0901 and working is - * 1501 which was released on 2007-10-26. Force 32bit DMA on anything - * older than 1501. Please read bko#9412 for more info. - */ -static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev) -{ - static const struct dmi_system_id sysids[] = { - { - .ident = "ASUS M2A-VM", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, - "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "M2A-VM"), - }, - }, - { } - }; - const char *cutoff_mmdd = "10/26"; - const char *date; - int year; - - if (pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x12, 0) || - !dmi_check_system(sysids)) - return false; - - /* - * Argh.... both version and date are free form strings. - * Let's hope they're using the same date format across - * different versions. - */ - date = dmi_get_system_info(DMI_BIOS_DATE); - year = dmi_get_year(DMI_BIOS_DATE); - if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' && - (year > 2007 || - (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0))) - return false; - - dev_printk(KERN_WARNING, &pdev->dev, "ASUS M2A-VM: BIOS too old, " - "forcing 32bit DMA, update BIOS\n"); - - return true; -} - static bool ahci_broken_system_poweroff(struct pci_dev *pdev) { static const struct dmi_system_id broken_systems[] = { @@ -2857,12 +2813,8 @@ if (board_id == board_ahci_sb700 && pdev->revision >= 0x40) hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL; - /* apply ASUS M2A_VM quirk */ - if (ahci_asus_m2a_vm_32bit_only(pdev)) - hpriv->flags |= AHCI_HFLAG_32BIT_ONLY; - - if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) - pci_enable_msi(pdev); + if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev)) + pci_intx(pdev, 1); /* save initial config */ ahci_save_initial_config(pdev, hpriv); --- linux-ec2-2.6.31.orig/drivers/ata/libata-core.c +++ linux-ec2-2.6.31/drivers/ata/libata-core.c @@ -139,7 +139,7 @@ module_param_named(fua, libata_fua, int, 0444); MODULE_PARM_DESC(fua, "FUA support (0=off [default], 1=on)"); -static int ata_ignore_hpa; +static int ata_ignore_hpa = 1; module_param_named(ignore_hpa, ata_ignore_hpa, int, 0644); MODULE_PARM_DESC(ignore_hpa, "Ignore HPA limit (0=keep BIOS limits, 1=ignore limits, using full disk)"); @@ -709,7 +709,13 @@ head = tf->device & 0xf; sect = tf->lbal; - block = (cyl * dev->heads + head) * dev->sectors + sect; + if (!sect) { + ata_dev_printk(dev, KERN_WARNING, "device reported " + "invalid CHS sector 0\n"); + sect = 1; /* oh well */ + } + + block = (cyl * dev->heads + head) * dev->sectors + sect - 1; } return block; @@ -4295,6 +4301,7 @@ { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ, }, { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ, }, { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ, }, + { "FUJITSU MHW2160BH PL", "0084001E", ATA_HORKAGE_NONCQ, }, /* devices which puke on READ_NATIVE_MAX */ { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, @@ -5002,12 +5009,14 @@ qc->flags |= ATA_QCFLAG_FAILED; if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) { - if (!ata_tag_internal(qc->tag)) { - /* always fill result TF for failed qc */ - fill_result_tf(qc); + /* always fill result TF for failed qc */ + fill_result_tf(qc); + + if (!ata_tag_internal(qc->tag)) ata_qc_schedule_eh(qc); - return; - } + else + __ata_qc_complete(qc); + return; } /* read result TF if requested */ --- linux-ec2-2.6.31.orig/drivers/ata/pata_sc1200.c +++ linux-ec2-2.6.31/drivers/ata/pata_sc1200.c @@ -235,8 +235,7 @@ .udma_mask = ATA_UDMA2, .port_ops = &sc1200_port_ops }; - /* Can't enable port 2 yet, see top comments */ - const struct ata_port_info *ppi[] = { &info, }; + const struct ata_port_info *ppi[] = { &info, NULL }; return ata_pci_sff_init_one(dev, ppi, &sc1200_sht, NULL); } --- linux-ec2-2.6.31.orig/drivers/ata/pata_hpt3x2n.c +++ linux-ec2-2.6.31/drivers/ata/pata_hpt3x2n.c @@ -8,7 +8,7 @@ * Copyright (C) 1999-2003 Andre Hedrick * Portions Copyright (C) 2001 Sun Microsystems, Inc. * Portions Copyright (C) 2003 Red Hat Inc - * Portions Copyright (C) 2005-2007 MontaVista Software, Inc. + * Portions Copyright (C) 2005-2009 MontaVista Software, Inc. * * * TODO @@ -25,7 +25,7 @@ #include #define DRV_NAME "pata_hpt3x2n" -#define DRV_VERSION "0.3.4" +#define DRV_VERSION "0.3.8" enum { HPT_PCI_FAST = (1 << 31), @@ -185,9 +185,8 @@ pci_read_config_dword(pdev, addr1, ®); mode = hpt3x2n_find_mode(ap, adev->pio_mode); - mode &= ~0x8000000; /* No FIFO in PIO */ - mode &= ~0x30070000; /* Leave config bits alone */ - reg &= 0x30070000; /* Strip timing bits */ + mode &= 0xCFC3FFFF; /* Leave DMA bits alone */ + reg &= ~0xCFC3FFFF; /* Strip timing bits */ pci_write_config_dword(pdev, addr1, reg | mode); } @@ -204,8 +203,7 @@ { struct pci_dev *pdev = to_pci_dev(ap->host->dev); u32 addr1, addr2; - u32 reg; - u32 mode; + u32 reg, mode, mask; u8 fast; addr1 = 0x40 + 4 * (adev->devno + 2 * ap->port_no); @@ -216,11 +214,12 @@ fast &= ~0x07; pci_write_config_byte(pdev, addr2, fast); + mask = adev->dma_mode < XFER_UDMA_0 ? 0x31C001FF : 0x303C0000; + pci_read_config_dword(pdev, addr1, ®); mode = hpt3x2n_find_mode(ap, adev->dma_mode); - mode |= 0x8000000; /* FIFO in MWDMA or UDMA */ - mode &= ~0xC0000000; /* Leave config bits alone */ - reg &= 0xC0000000; /* Strip timing bits */ + mode &= mask; + reg &= ~mask; pci_write_config_dword(pdev, addr1, reg | mode); } @@ -263,7 +262,7 @@ static void hpt3x2n_set_clock(struct ata_port *ap, int source) { - void __iomem *bmdma = ap->ioaddr.bmdma_addr; + void __iomem *bmdma = ap->ioaddr.bmdma_addr - ap->port_no * 8; /* Tristate the bus */ iowrite8(0x80, bmdma+0x73); @@ -273,9 +272,9 @@ iowrite8(source, bmdma+0x7B); iowrite8(0xC0, bmdma+0x79); - /* Reset state machines */ - iowrite8(0x37, bmdma+0x70); - iowrite8(0x37, bmdma+0x74); + /* Reset state machines, avoid enabling the disabled channels */ + iowrite8(ioread8(bmdma+0x70) | 0x32, bmdma+0x70); + iowrite8(ioread8(bmdma+0x74) | 0x32, bmdma+0x74); /* Complete reset */ iowrite8(0x00, bmdma+0x79); @@ -285,21 +284,10 @@ iowrite8(0x00, bmdma+0x77); } -/* Check if our partner interface is busy */ - -static int hpt3x2n_pair_idle(struct ata_port *ap) -{ - struct ata_host *host = ap->host; - struct ata_port *pair = host->ports[ap->port_no ^ 1]; - - if (pair->hsm_task_state == HSM_ST_IDLE) - return 1; - return 0; -} - static int hpt3x2n_use_dpll(struct ata_port *ap, int writing) { long flags = (long)ap->host->private_data; + /* See if we should use the DPLL */ if (writing) return USE_DPLL; /* Needed for write */ @@ -308,20 +296,35 @@ return 0; } +static int hpt3x2n_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ata_port *alt = ap->host->ports[ap->port_no ^ 1]; + int rc, flags = (long)ap->host->private_data; + int dpll = hpt3x2n_use_dpll(ap, qc->tf.flags & ATA_TFLAG_WRITE); + + /* First apply the usual rules */ + rc = ata_std_qc_defer(qc); + if (rc != 0) + return rc; + + if ((flags & USE_DPLL) != dpll && alt->qc_active) + return ATA_DEFER_PORT; + return 0; +} + static unsigned int hpt3x2n_qc_issue(struct ata_queued_cmd *qc) { - struct ata_taskfile *tf = &qc->tf; struct ata_port *ap = qc->ap; int flags = (long)ap->host->private_data; + int dpll = hpt3x2n_use_dpll(ap, qc->tf.flags & ATA_TFLAG_WRITE); - if (hpt3x2n_pair_idle(ap)) { - int dpll = hpt3x2n_use_dpll(ap, (tf->flags & ATA_TFLAG_WRITE)); - if ((flags & USE_DPLL) != dpll) { - if (dpll == 1) - hpt3x2n_set_clock(ap, 0x21); - else - hpt3x2n_set_clock(ap, 0x23); - } + if ((flags & USE_DPLL) != dpll) { + flags &= ~USE_DPLL; + flags |= dpll; + ap->host->private_data = (void *)(long)flags; + + hpt3x2n_set_clock(ap, dpll ? 0x21 : 0x23); } return ata_sff_qc_issue(qc); } @@ -338,6 +341,8 @@ .inherits = &ata_bmdma_port_ops, .bmdma_stop = hpt3x2n_bmdma_stop, + + .qc_defer = hpt3x2n_qc_defer, .qc_issue = hpt3x2n_qc_issue, .cable_detect = hpt3x2n_cable_detect, @@ -455,7 +460,7 @@ unsigned int f_low, f_high; int adjust; unsigned long iobase = pci_resource_start(dev, 4); - void *hpriv = NULL; + void *hpriv = (void *)USE_DPLL; int rc; rc = pcim_enable_device(dev); @@ -543,7 +548,7 @@ /* Set our private data up. We only need a few flags so we use it directly */ if (pci_mhz > 60) { - hpriv = (void *)PCI66; + hpriv = (void *)(PCI66 | USE_DPLL); /* * On HPT371N, if ATA clock is 66 MHz we must set bit 2 in * the MISC. register to stretch the UltraDMA Tss timing. --- linux-ec2-2.6.31.orig/drivers/ata/pata_amd.c +++ linux-ec2-2.6.31/drivers/ata/pata_amd.c @@ -307,6 +307,9 @@ limit |= ATA_MASK_PIO; if (!(limit & (ATA_MASK_MWDMA | ATA_MASK_UDMA))) limit |= ATA_MASK_MWDMA | ATA_MASK_UDMA; + /* PIO4, MWDMA2, UDMA2 should always be supported regardless of + cable detection result */ + limit |= ata_pack_xfermask(ATA_PIO4, ATA_MWDMA2, ATA_UDMA2); ata_port_printk(ap, KERN_DEBUG, "nv_mode_filter: 0x%lx&0x%lx->0x%lx, " "BIOS=0x%lx (0x%x) ACPI=0x%lx%s\n", --- linux-ec2-2.6.31.orig/drivers/ata/pata_via.c +++ linux-ec2-2.6.31/drivers/ata/pata_via.c @@ -111,7 +111,7 @@ { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, - { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, + { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0xff, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, --- linux-ec2-2.6.31.orig/drivers/ata/libata-eh.c +++ linux-ec2-2.6.31/drivers/ata/libata-eh.c @@ -2541,14 +2541,14 @@ dev->pio_mode = XFER_PIO_0; dev->flags &= ~ATA_DFLAG_SLEEPING; - if (!ata_phys_link_offline(ata_dev_phys_link(dev))) { - /* apply class override */ - if (lflags & ATA_LFLAG_ASSUME_ATA) - classes[dev->devno] = ATA_DEV_ATA; - else if (lflags & ATA_LFLAG_ASSUME_SEMB) - classes[dev->devno] = ATA_DEV_SEMB_UNSUP; - } else - classes[dev->devno] = ATA_DEV_NONE; + if (ata_phys_link_offline(ata_dev_phys_link(dev))) + continue; + + /* apply class override */ + if (lflags & ATA_LFLAG_ASSUME_ATA) + classes[dev->devno] = ATA_DEV_ATA; + else if (lflags & ATA_LFLAG_ASSUME_SEMB) + classes[dev->devno] = ATA_DEV_SEMB_UNSUP; } /* record current link speed */ @@ -2581,34 +2581,48 @@ slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); - /* Make sure onlineness and classification result correspond. + /* + * Make sure onlineness and classification result correspond. * Hotplug could have happened during reset and some * controllers fail to wait while a drive is spinning up after * being hotplugged causing misdetection. By cross checking - * link onlineness and classification result, those conditions - * can be reliably detected and retried. + * link on/offlineness and classification result, those + * conditions can be reliably detected and retried. */ nr_unknown = 0; ata_for_each_dev(dev, link, ALL) { - /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ - if (classes[dev->devno] == ATA_DEV_UNKNOWN) { - classes[dev->devno] = ATA_DEV_NONE; - if (ata_phys_link_online(ata_dev_phys_link(dev))) + if (ata_phys_link_online(ata_dev_phys_link(dev))) { + if (classes[dev->devno] == ATA_DEV_UNKNOWN) { + ata_dev_printk(dev, KERN_DEBUG, "link online " + "but device misclassifed\n"); + classes[dev->devno] = ATA_DEV_NONE; nr_unknown++; + } + } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { + if (ata_class_enabled(classes[dev->devno])) + ata_dev_printk(dev, KERN_DEBUG, "link offline, " + "clearing class %d to NONE\n", + classes[dev->devno]); + classes[dev->devno] = ATA_DEV_NONE; + } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { + ata_dev_printk(dev, KERN_DEBUG, "link status unknown, " + "clearing UNKNOWN to NONE\n"); + classes[dev->devno] = ATA_DEV_NONE; } } if (classify && nr_unknown) { if (try < max_tries) { ata_link_printk(link, KERN_WARNING, "link online but " - "device misclassified, retrying\n"); + "%d devices misclassified, retrying\n", + nr_unknown); failed_link = link; rc = -EAGAIN; goto fail; } ata_link_printk(link, KERN_WARNING, - "link online but device misclassified, " - "device detection might fail\n"); + "link online but %d devices misclassified, " + "device detection might fail\n", nr_unknown); } /* reset successful, schedule revalidation */ @@ -2835,12 +2849,14 @@ * device detection messages backwards. */ ata_for_each_dev(dev, link, ALL) { - if (!(new_mask & (1 << dev->devno)) || - dev->class == ATA_DEV_PMP) + if (!(new_mask & (1 << dev->devno))) continue; dev->class = ehc->classes[dev->devno]; + if (dev->class == ATA_DEV_PMP) + continue; + ehc->i.flags |= ATA_EHI_PRINTINFO; rc = ata_dev_configure(dev); ehc->i.flags &= ~ATA_EHI_PRINTINFO; --- linux-ec2-2.6.31.orig/drivers/message/fusion/mptbase.c +++ linux-ec2-2.6.31/drivers/message/fusion/mptbase.c @@ -1015,9 +1015,9 @@ { SGESimple64_t *pSge = (SGESimple64_t *) pAddr; pSge->Address.Low = cpu_to_le32 - (lower_32_bits((unsigned long)(dma_addr))); + (lower_32_bits(dma_addr)); pSge->Address.High = cpu_to_le32 - (upper_32_bits((unsigned long)dma_addr)); + (upper_32_bits(dma_addr)); pSge->FlagsLength = cpu_to_le32 ((flagslength | MPT_SGE_FLAGS_64_BIT_ADDRESSING)); } @@ -1038,8 +1038,8 @@ u32 tmp; pSge->Address.Low = cpu_to_le32 - (lower_32_bits((unsigned long)(dma_addr))); - tmp = (u32)(upper_32_bits((unsigned long)dma_addr)); + (lower_32_bits(dma_addr)); + tmp = (u32)(upper_32_bits(dma_addr)); /* * 1078 errata workaround for the 36GB limitation @@ -1101,7 +1101,7 @@ pChain->NextChainOffset = next; pChain->Address.Low = cpu_to_le32(tmp); - tmp = (u32)(upper_32_bits((unsigned long)dma_addr)); + tmp = (u32)(upper_32_bits(dma_addr)); pChain->Address.High = cpu_to_le32(tmp); } @@ -3266,6 +3266,16 @@ pfacts->IOCStatus = le16_to_cpu(pfacts->IOCStatus); pfacts->IOCLogInfo = le32_to_cpu(pfacts->IOCLogInfo); pfacts->MaxDevices = le16_to_cpu(pfacts->MaxDevices); + /* + * VMware emulation is broken, its PortFact's MaxDevices reports value + * programmed by IOC Init, so if you program IOC Init to 256 (which is 0, + * as that field is only 8 bit), it reports back 0 in port facts, instead + * of 256... And unfortunately using 256 triggers another bug in the + * code (parallel SCSI can have only 16 devices). + */ + if (pfacts->MaxDevices == 0) { + pfacts->MaxDevices = 16; + } pfacts->PortSCSIID = le16_to_cpu(pfacts->PortSCSIID); pfacts->ProtocolFlags = le16_to_cpu(pfacts->ProtocolFlags); pfacts->MaxPostedCmdBuffers = le16_to_cpu(pfacts->MaxPostedCmdBuffers); --- linux-ec2-2.6.31.orig/drivers/media/common/tuners/tda18271-fe.c +++ linux-ec2-2.6.31/drivers/media/common/tuners/tda18271-fe.c @@ -595,13 +595,13 @@ case RF2: map[i].rf_a1 = (prog_cal[RF2] - prog_tab[RF2] - prog_cal[RF1] + prog_tab[RF1]) / - ((rf_freq[RF2] - rf_freq[RF1]) / 1000); + (s32)((rf_freq[RF2] - rf_freq[RF1]) / 1000); map[i].rf2 = rf_freq[RF2] / 1000; break; case RF3: map[i].rf_a2 = (prog_cal[RF3] - prog_tab[RF3] - prog_cal[RF2] + prog_tab[RF2]) / - ((rf_freq[RF3] - rf_freq[RF2]) / 1000); + (s32)((rf_freq[RF3] - rf_freq[RF2]) / 1000); map[i].rf_b2 = prog_cal[RF2] - prog_tab[RF2]; map[i].rf3 = rf_freq[RF3] / 1000; break; @@ -963,12 +963,12 @@ struct tda18271_std_map_item *map; char *mode; int ret; - u32 freq = params->frequency * 62500; + u32 freq = params->frequency * 125 * + ((params->mode == V4L2_TUNER_RADIO) ? 1 : 1000) / 2; priv->mode = TDA18271_ANALOG; if (params->mode == V4L2_TUNER_RADIO) { - freq = freq / 1000; map = &std_map->fm_radio; mode = "fm"; } else if (params->std & V4L2_STD_MN) { --- linux-ec2-2.6.31.orig/drivers/media/common/tuners/mxl5007t.c +++ linux-ec2-2.6.31/drivers/media/common/tuners/mxl5007t.c @@ -196,7 +196,7 @@ i = j = 0; while (reg_pair1[i].reg || reg_pair1[i].val) { - while (reg_pair2[j].reg || reg_pair2[j].reg) { + while (reg_pair2[j].reg || reg_pair2[j].val) { if (reg_pair1[i].reg != reg_pair2[j].reg) { j++; continue; --- linux-ec2-2.6.31.orig/drivers/media/video/s2255drv.c +++ linux-ec2-2.6.31/drivers/media/video/s2255drv.c @@ -598,11 +598,6 @@ buf = list_entry(dma_q->active.next, struct s2255_buffer, vb.queue); - if (!waitqueue_active(&buf->vb.done)) { - /* no one active */ - rc = -1; - goto unlock; - } list_del(&buf->vb.queue); do_gettimeofday(&buf->vb.ts); dprintk(100, "[%p/%d] wakeup\n", buf, buf->vb.i); --- linux-ec2-2.6.31.orig/drivers/media/video/v4l1-compat.c +++ linux-ec2-2.6.31/drivers/media/video/v4l1-compat.c @@ -565,10 +565,9 @@ break; } chan->norm = 0; - err = drv(file, VIDIOC_G_STD, &sid); - if (err < 0) - dprintk("VIDIOCGCHAN / VIDIOC_G_STD: %ld\n", err); - if (err == 0) { + /* Note: G_STD might not be present for radio receivers, + * so we should ignore any errors. */ + if (drv(file, VIDIOC_G_STD, &sid) == 0) { if (sid & V4L2_STD_PAL) chan->norm = VIDEO_MODE_PAL; if (sid & V4L2_STD_NTSC) @@ -777,10 +776,9 @@ tun->flags |= VIDEO_TUNER_SECAM; } - err = drv(file, VIDIOC_G_STD, &sid); - if (err < 0) - dprintk("VIDIOCGTUNER / VIDIOC_G_STD: %ld\n", err); - if (err == 0) { + /* Note: G_STD might not be present for radio receivers, + * so we should ignore any errors. */ + if (drv(file, VIDIOC_G_STD, &sid) == 0) { if (sid & V4L2_STD_PAL) tun->mode = VIDEO_MODE_PAL; if (sid & V4L2_STD_NTSC) --- linux-ec2-2.6.31.orig/drivers/media/video/ov511.c +++ linux-ec2-2.6.31/drivers/media/video/ov511.c @@ -5878,7 +5878,7 @@ goto error; } - mutex_lock(&ov->lock); + mutex_unlock(&ov->lock); return 0; --- linux-ec2-2.6.31.orig/drivers/media/video/cx88/cx88.h +++ linux-ec2-2.6.31/drivers/media/video/cx88/cx88.h @@ -237,6 +237,7 @@ #define CX88_BOARD_TERRATEC_CINERGY_HT_PCI_MKII 79 #define CX88_BOARD_HAUPPAUGE_IRONLY 80 #define CX88_BOARD_WINFAST_DTV1800H 81 +#define CX88_BOARD_WINFAST_DTV2000H_J 82 enum cx88_itype { CX88_VMUX_COMPOSITE1 = 1, --- linux-ec2-2.6.31.orig/drivers/media/video/cx88/cx88-cards.c +++ linux-ec2-2.6.31/drivers/media/video/cx88/cx88-cards.c @@ -1283,6 +1283,51 @@ }, .mpeg = CX88_MPEG_DVB, }, + [CX88_BOARD_WINFAST_DTV2000H_J] = { + .name = "WinFast DTV2000 H rev. J", + .tuner_type = TUNER_PHILIPS_FMD1216ME_MK3, + .radio_type = UNSET, + .tuner_addr = ADDR_UNSET, + .radio_addr = ADDR_UNSET, + .tda9887_conf = TDA9887_PRESENT, + .input = {{ + .type = CX88_VMUX_TELEVISION, + .vmux = 0, + .gpio0 = 0x00017300, + .gpio1 = 0x00008207, + .gpio2 = 0x00000000, + .gpio3 = 0x02000000, + },{ + .type = CX88_VMUX_TELEVISION, + .vmux = 0, + .gpio0 = 0x00018300, + .gpio1 = 0x0000f207, + .gpio2 = 0x00017304, + .gpio3 = 0x02000000, + },{ + .type = CX88_VMUX_COMPOSITE1, + .vmux = 1, + .gpio0 = 0x00018301, + .gpio1 = 0x0000f207, + .gpio2 = 0x00017304, + .gpio3 = 0x02000000, + },{ + .type = CX88_VMUX_SVIDEO, + .vmux = 2, + .gpio0 = 0x00018301, + .gpio1 = 0x0000f207, + .gpio2 = 0x00017304, + .gpio3 = 0x02000000, + }}, + .radio = { + .type = CX88_RADIO, + .gpio0 = 0x00015702, + .gpio1 = 0x0000f207, + .gpio2 = 0x00015702, + .gpio3 = 0x02000000, + }, + .mpeg = CX88_MPEG_DVB, + }, [CX88_BOARD_GENIATECH_DVBS] = { .name = "Geniatech DVB-S", .tuner_type = TUNER_ABSENT, @@ -2282,6 +2327,10 @@ .subdevice = 0x665e, .card = CX88_BOARD_WINFAST_DTV2000H, },{ + .subvendor = 0x107d, + .subdevice = 0x6f2b, + .card = CX88_BOARD_WINFAST_DTV2000H_J, + },{ .subvendor = 0x18ac, .subdevice = 0xd800, /* FusionHDTV 3 Gold (original revision) */ .card = CX88_BOARD_DVICO_FUSIONHDTV_3_GOLD_Q, --- linux-ec2-2.6.31.orig/drivers/media/video/cx88/cx88-dvb.c +++ linux-ec2-2.6.31/drivers/media/video/cx88/cx88-dvb.c @@ -696,6 +696,7 @@ } break; case CX88_BOARD_WINFAST_DTV2000H: + case CX88_BOARD_WINFAST_DTV2000H_J: case CX88_BOARD_HAUPPAUGE_HVR1100: case CX88_BOARD_HAUPPAUGE_HVR1100LP: case CX88_BOARD_HAUPPAUGE_HVR1300: --- linux-ec2-2.6.31.orig/drivers/media/video/cx88/cx88-input.c +++ linux-ec2-2.6.31/drivers/media/video/cx88/cx88-input.c @@ -225,6 +225,7 @@ ir->sampling = 1; break; case CX88_BOARD_WINFAST_DTV2000H: + case CX88_BOARD_WINFAST_DTV2000H_J: case CX88_BOARD_WINFAST_DTV1800H: ir_codes = ir_codes_winfast; ir->gpio_addr = MO_GP0_IO; --- linux-ec2-2.6.31.orig/drivers/media/video/saa7134/saa7134-cards.c +++ linux-ec2-2.6.31/drivers/media/video/saa7134/saa7134-cards.c @@ -3373,6 +3373,7 @@ .tuner_config = 3, .mpeg = SAA7134_MPEG_DVB, .ts_type = SAA7134_MPEG_TS_SERIAL, + .ts_force_val = 1, .gpiomask = 0x0800100, /* GPIO 21 is an INPUT */ .inputs = {{ .name = name_tv, --- linux-ec2-2.6.31.orig/drivers/media/video/saa7134/saa7134-input.c +++ linux-ec2-2.6.31/drivers/media/video/saa7134/saa7134-input.c @@ -684,8 +684,6 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev) { - struct i2c_board_info info; - struct IR_i2c_init_data init_data; const unsigned short addr_list[] = { 0x7a, 0x47, 0x71, 0x2d, I2C_CLIENT_END @@ -705,32 +703,32 @@ return; } - memset(&info, 0, sizeof(struct i2c_board_info)); - memset(&init_data, 0, sizeof(struct IR_i2c_init_data)); - strlcpy(info.type, "ir_video", I2C_NAME_SIZE); + memset(&dev->info, 0, sizeof(dev->info)); + memset(&dev->init_data, 0, sizeof(dev->init_data)); + strlcpy(dev->info.type, "ir_video", I2C_NAME_SIZE); switch (dev->board) { case SAA7134_BOARD_PINNACLE_PCTV_110i: case SAA7134_BOARD_PINNACLE_PCTV_310i: - init_data.name = "Pinnacle PCTV"; + dev->init_data.name = "Pinnacle PCTV"; if (pinnacle_remote == 0) { - init_data.get_key = get_key_pinnacle_color; - init_data.ir_codes = ir_codes_pinnacle_color; + dev->init_data.get_key = get_key_pinnacle_color; + dev->init_data.ir_codes = ir_codes_pinnacle_color; } else { - init_data.get_key = get_key_pinnacle_grey; - init_data.ir_codes = ir_codes_pinnacle_grey; + dev->init_data.get_key = get_key_pinnacle_grey; + dev->init_data.ir_codes = ir_codes_pinnacle_grey; } break; case SAA7134_BOARD_UPMOST_PURPLE_TV: - init_data.name = "Purple TV"; - init_data.get_key = get_key_purpletv; - init_data.ir_codes = ir_codes_purpletv; + dev->init_data.name = "Purple TV"; + dev->init_data.get_key = get_key_purpletv; + dev->init_data.ir_codes = ir_codes_purpletv; break; case SAA7134_BOARD_MSI_TVATANYWHERE_PLUS: - init_data.name = "MSI TV@nywhere Plus"; - init_data.get_key = get_key_msi_tvanywhere_plus; - init_data.ir_codes = ir_codes_msi_tvanywhere_plus; - info.addr = 0x30; + dev->init_data.name = "MSI TV@nywhere Plus"; + dev->init_data.get_key = get_key_msi_tvanywhere_plus; + dev->init_data.ir_codes = ir_codes_msi_tvanywhere_plus; + dev->info.addr = 0x30; /* MSI TV@nywhere Plus controller doesn't seem to respond to probes unless we read something from an existing device. Weird... @@ -741,9 +739,9 @@ (1 == rc) ? "yes" : "no"); break; case SAA7134_BOARD_HAUPPAUGE_HVR1110: - init_data.name = "HVR 1110"; - init_data.get_key = get_key_hvr1110; - init_data.ir_codes = ir_codes_hauppauge_new; + dev->init_data.name = "HVR 1110"; + dev->init_data.get_key = get_key_hvr1110; + dev->init_data.ir_codes = ir_codes_hauppauge_new; break; case SAA7134_BOARD_BEHOLD_607FM_MK3: case SAA7134_BOARD_BEHOLD_607FM_MK5: @@ -757,26 +755,26 @@ case SAA7134_BOARD_BEHOLD_M63: case SAA7134_BOARD_BEHOLD_M6_EXTRA: case SAA7134_BOARD_BEHOLD_H6: - init_data.name = "BeholdTV"; - init_data.get_key = get_key_beholdm6xx; - init_data.ir_codes = ir_codes_behold; + dev->init_data.name = "BeholdTV"; + dev->init_data.get_key = get_key_beholdm6xx; + dev->init_data.ir_codes = ir_codes_behold; break; case SAA7134_BOARD_AVERMEDIA_CARDBUS_501: case SAA7134_BOARD_AVERMEDIA_CARDBUS_506: - info.addr = 0x40; + dev->info.addr = 0x40; break; } - if (init_data.name) - info.platform_data = &init_data; + if (dev->init_data.name) + dev->info.platform_data = &dev->init_data; /* No need to probe if address is known */ - if (info.addr) { - i2c_new_device(&dev->i2c_adap, &info); + if (dev->info.addr) { + i2c_new_device(&dev->i2c_adap, &dev->info); return; } /* Address not known, fallback to probing */ - i2c_new_probed_device(&dev->i2c_adap, &info, addr_list); + i2c_new_probed_device(&dev->i2c_adap, &dev->info, addr_list); } static int saa7134_rc5_irq(struct saa7134_dev *dev) --- linux-ec2-2.6.31.orig/drivers/media/video/saa7134/saa7134.h +++ linux-ec2-2.6.31/drivers/media/video/saa7134/saa7134.h @@ -355,6 +355,7 @@ enum saa7134_mpeg_type mpeg; enum saa7134_mpeg_ts_type ts_type; unsigned int vid_port_opts; + unsigned int ts_force_val:1; }; #define card_has_radio(dev) (NULL != saa7134_boards[dev->board].radio.name) @@ -584,6 +585,10 @@ int nosignal; unsigned int insuspend; + /* I2C keyboard data */ + struct i2c_board_info info; + struct IR_i2c_init_data init_data; + /* SAA7134_MPEG_* */ struct saa7134_ts ts; struct saa7134_dmaqueue ts_q; --- linux-ec2-2.6.31.orig/drivers/media/video/saa7134/saa7134-ts.c +++ linux-ec2-2.6.31/drivers/media/video/saa7134/saa7134-ts.c @@ -262,11 +262,13 @@ switch (saa7134_boards[dev->board].ts_type) { case SAA7134_MPEG_TS_PARALLEL: saa_writeb(SAA7134_TS_SERIAL0, 0x40); - saa_writeb(SAA7134_TS_PARALLEL, 0xec); + saa_writeb(SAA7134_TS_PARALLEL, 0xec | + (saa7134_boards[dev->board].ts_force_val << 4)); break; case SAA7134_MPEG_TS_SERIAL: saa_writeb(SAA7134_TS_SERIAL0, 0xd8); - saa_writeb(SAA7134_TS_PARALLEL, 0x6c); + saa_writeb(SAA7134_TS_PARALLEL, 0x6c | + (saa7134_boards[dev->board].ts_force_val << 4)); saa_writeb(SAA7134_TS_PARALLEL_SERIAL, 0xbc); saa_writeb(SAA7134_TS_SERIAL1, 0x02); break; --- linux-ec2-2.6.31.orig/drivers/media/video/em28xx/em28xx-audio.c +++ linux-ec2-2.6.31/drivers/media/video/em28xx/em28xx-audio.c @@ -383,6 +383,11 @@ static int snd_em28xx_prepare(struct snd_pcm_substream *substream) { + struct em28xx *dev = snd_pcm_substream_chip(substream); + + dev->adev.hwptr_done_capture = 0; + dev->adev.capture_transfer_done = 0; + return 0; } --- linux-ec2-2.6.31.orig/drivers/media/video/em28xx/em28xx-cards.c +++ linux-ec2-2.6.31/drivers/media/video/em28xx/em28xx-cards.c @@ -2170,8 +2170,6 @@ /* ----------------------------------------------------------------------- */ void em28xx_register_i2c_ir(struct em28xx *dev) { - struct i2c_board_info info; - struct IR_i2c_init_data init_data; const unsigned short addr_list[] = { 0x30, 0x47, I2C_CLIENT_END }; @@ -2179,9 +2177,9 @@ if (disable_ir) return; - memset(&info, 0, sizeof(struct i2c_board_info)); - memset(&init_data, 0, sizeof(struct IR_i2c_init_data)); - strlcpy(info.type, "ir_video", I2C_NAME_SIZE); + memset(&dev->info, 0, sizeof(&dev->info)); + memset(&dev->init_data, 0, sizeof(dev->init_data)); + strlcpy(dev->info.type, "ir_video", I2C_NAME_SIZE); /* detect & configure */ switch (dev->model) { @@ -2191,19 +2189,19 @@ break; case (EM2800_BOARD_TERRATEC_CINERGY_200): case (EM2820_BOARD_TERRATEC_CINERGY_250): - init_data.ir_codes = ir_codes_em_terratec; - init_data.get_key = em28xx_get_key_terratec; - init_data.name = "i2c IR (EM28XX Terratec)"; + dev->init_data.ir_codes = ir_codes_em_terratec; + dev->init_data.get_key = em28xx_get_key_terratec; + dev->init_data.name = "i2c IR (EM28XX Terratec)"; break; case (EM2820_BOARD_PINNACLE_USB_2): - init_data.ir_codes = ir_codes_pinnacle_grey; - init_data.get_key = em28xx_get_key_pinnacle_usb_grey; - init_data.name = "i2c IR (EM28XX Pinnacle PCTV)"; + dev->init_data.ir_codes = ir_codes_pinnacle_grey; + dev->init_data.get_key = em28xx_get_key_pinnacle_usb_grey; + dev->init_data.name = "i2c IR (EM28XX Pinnacle PCTV)"; break; case (EM2820_BOARD_HAUPPAUGE_WINTV_USB_2): - init_data.ir_codes = ir_codes_hauppauge_new; - init_data.get_key = em28xx_get_key_em_haup; - init_data.name = "i2c IR (EM2840 Hauppauge)"; + dev->init_data.ir_codes = ir_codes_hauppauge_new; + dev->init_data.get_key = em28xx_get_key_em_haup; + dev->init_data.name = "i2c IR (EM2840 Hauppauge)"; break; case (EM2820_BOARD_MSI_VOX_USB_2): break; @@ -2215,9 +2213,9 @@ break; } - if (init_data.name) - info.platform_data = &init_data; - i2c_new_probed_device(&dev->i2c_adap, &info, addr_list); + if (dev->init_data.name) + dev->info.platform_data = &dev->init_data; + i2c_new_probed_device(&dev->i2c_adap, &dev->info, addr_list); } void em28xx_card_setup(struct em28xx *dev) --- linux-ec2-2.6.31.orig/drivers/media/video/em28xx/em28xx.h +++ linux-ec2-2.6.31/drivers/media/video/em28xx/em28xx.h @@ -595,6 +595,10 @@ struct delayed_work sbutton_query_work; struct em28xx_dvb *dvb; + + /* I2C keyboard data */ + struct i2c_board_info info; + struct IR_i2c_init_data init_data; }; struct em28xx_ops { --- linux-ec2-2.6.31.orig/drivers/media/video/zc0301/zc0301_sensor.h +++ linux-ec2-2.6.31/drivers/media/video/zc0301/zc0301_sensor.h @@ -62,7 +62,6 @@ #define ZC0301_ID_TABLE \ static const struct usb_device_id zc0301_id_table[] = { \ { ZC0301_USB_DEVICE(0x046d, 0x08ae, 0xff), }, /* PAS202 */ \ - { ZC0301_USB_DEVICE(0x0ac8, 0x303b, 0xff), }, /* PB-0330 */ \ { } \ }; #else --- linux-ec2-2.6.31.orig/drivers/media/video/bt8xx/bttv-driver.c +++ linux-ec2-2.6.31/drivers/media/video/bt8xx/bttv-driver.c @@ -1299,7 +1299,7 @@ tvnorm = &bttv_tvnorms[norm]; - if (!memcmp(&bttv_tvnorms[btv->tvnorm].cropcap, &tvnorm->cropcap, + if (memcmp(&bttv_tvnorms[btv->tvnorm].cropcap, &tvnorm->cropcap, sizeof (tvnorm->cropcap))) { bttv_crop_reset(&btv->crop[0], norm); btv->crop[1] = btv->crop[0]; /* current = default */ @@ -3798,11 +3798,34 @@ if (!V4L2_FIELD_HAS_BOTH(item->vb.field) && (item->vb.queue.next != &btv->capture)) { item = list_entry(item->vb.queue.next, struct bttv_buffer, vb.queue); + /* Mike Isely - Only check + * and set up the bottom field in the logic + * below. Don't ever do the top field. This + * of course means that if we set up the + * bottom field in the above code that we'll + * actually skip a field. But that's OK. + * Having processed only a single buffer this + * time, then the next time around the first + * available buffer should be for a top field. + * That will then cause us here to set up a + * top then a bottom field in the normal way. + * The alternative to this understanding is + * that we set up the second available buffer + * as a top field, but that's out of order + * since this driver always processes the top + * field first - the effect will be the two + * buffers being returned in the wrong order, + * with the second buffer also being delayed + * by one field time (owing to the fifo nature + * of videobuf). Worse still, we'll be stuck + * doing fields out of order now every time + * until something else causes a field to be + * dropped. By effectively forcing a field to + * drop this way then we always get back into + * sync within a single frame time. (Out of + * order fields can screw up deinterlacing + * algorithms.) */ if (!V4L2_FIELD_HAS_BOTH(item->vb.field)) { - if (NULL == set->top && - V4L2_FIELD_TOP == item->vb.field) { - set->top = item; - } if (NULL == set->bottom && V4L2_FIELD_BOTTOM == item->vb.field) { set->bottom = item; --- linux-ec2-2.6.31.orig/drivers/media/video/uvc/uvc_driver.c +++ linux-ec2-2.6.31/drivers/media/video/uvc/uvc_driver.c @@ -1983,6 +1983,15 @@ .bInterfaceProtocol = 0, .driver_info = UVC_QUIRK_PROBE_MINMAX | UVC_QUIRK_IGNORE_SELECTOR_UNIT }, + /* COMPAL JHL90 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x064e, + .idProduct = 0xa115, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_MINMAX }, /* Generic USB Video Class */ { USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, 0) }, {} --- linux-ec2-2.6.31.orig/drivers/media/video/gspca/sonixj.c +++ linux-ec2-2.6.31/drivers/media/video/gspca/sonixj.c @@ -727,7 +727,7 @@ {0xa1, 0x21, 0x12, 0x05, 0x00, 0x00, 0x00, 0x10}, /* Outformat = rawRGB */ {0xa1, 0x21, 0x13, 0xb8, 0x00, 0x00, 0x00, 0x10}, /* init COM8 */ - {0xd1, 0x21, 0x00, 0x01, 0x74, 0x74, 0x00, 0x10}, + {0xd1, 0x21, 0x00, 0x01, 0x74, 0x92, 0x00, 0x10}, /* GAIN BLUE RED VREF */ {0xd1, 0x21, 0x04, 0x00, 0x7d, 0x62, 0x00, 0x10}, /* COM 1 BAVE GEAVE AECHH */ @@ -783,7 +783,7 @@ {0xc1, 0x21, 0x88, 0xaf, 0xc7, 0xdf, 0x00, 0x10}, /* gamma curve */ {0xc1, 0x21, 0x8b, 0x99, 0x99, 0xcf, 0x00, 0x10}, /* reserved */ {0xb1, 0x21, 0x92, 0x00, 0x00, 0x00, 0x00, 0x10}, /* DM_LNL/H */ - {0xb1, 0x21, 0xa1, 0x00, 0x00, 0x00, 0x00, 0x10}, + {0xa1, 0x21, 0xa1, 0x00, 0x00, 0x00, 0x00, 0x10}, /****** (some exchanges in the win trace) ******/ {0xa1, 0x21, 0x1e, 0x01, 0x00, 0x00, 0x00, 0x10}, /* MVFP */ /* bits[3..0]reserved */ @@ -1145,17 +1145,12 @@ reg_w1(gspca_dev, 0x01, 0x42); break; case SENSOR_OV7660: - reg_w1(gspca_dev, 0x01, 0x61); - reg_w1(gspca_dev, 0x17, 0x20); - reg_w1(gspca_dev, 0x01, 0x60); - reg_w1(gspca_dev, 0x01, 0x40); - break; case SENSOR_SP80708: reg_w1(gspca_dev, 0x01, 0x63); reg_w1(gspca_dev, 0x17, 0x20); reg_w1(gspca_dev, 0x01, 0x62); reg_w1(gspca_dev, 0x01, 0x42); - mdelay(100); + msleep(100); reg_w1(gspca_dev, 0x02, 0x62); break; /* case SENSOR_HV7131R: */ @@ -1624,6 +1619,8 @@ static void setinfrared(struct sd *sd) { + if (sd->gspca_dev.ctrl_dis & (1 << INFRARED_IDX)) + return; /*fixme: different sequence for StarCam Clip and StarCam 370i */ /* Clip */ i2c_w1(&sd->gspca_dev, 0x02, /* gpio */ @@ -1637,16 +1634,19 @@ if (gspca_dev->ctrl_dis & (1 << FREQ_IDX)) return; if (sd->sensor == SENSOR_OV7660) { + u8 com8; + + com8 = 0xdf; /* auto gain/wb/expo */ switch (sd->freq) { case 0: /* Banding filter disabled */ - i2c_w1(gspca_dev, 0x13, 0xdf); + i2c_w1(gspca_dev, 0x13, com8 | 0x20); break; case 1: /* 50 hz */ - i2c_w1(gspca_dev, 0x13, 0xff); + i2c_w1(gspca_dev, 0x13, com8); i2c_w1(gspca_dev, 0x3b, 0x0a); break; case 2: /* 60 hz */ - i2c_w1(gspca_dev, 0x13, 0xff); + i2c_w1(gspca_dev, 0x13, com8); i2c_w1(gspca_dev, 0x3b, 0x02); break; } @@ -1796,12 +1796,6 @@ reg_w1(gspca_dev, 0x99, 0x60); break; case SENSOR_OV7660: - reg_w1(gspca_dev, 0x9a, 0x05); - if (sd->bridge == BRIDGE_SN9C105) - reg_w1(gspca_dev, 0x99, 0xff); - else - reg_w1(gspca_dev, 0x99, 0x5b); - break; case SENSOR_SP80708: reg_w1(gspca_dev, 0x9a, 0x05); reg_w1(gspca_dev, 0x99, 0x59); @@ -2325,18 +2319,19 @@ {USB_DEVICE(0x0c45, 0x607c), BSI(SN9C102P, HV7131R, 0x11)}, /* {USB_DEVICE(0x0c45, 0x607e), BSI(SN9C102P, OV7630, 0x??)}, */ {USB_DEVICE(0x0c45, 0x60c0), BSI(SN9C105, MI0360, 0x5d)}, -/* {USB_DEVICE(0x0c45, 0x60c8), BSI(SN9C105, OM6801, 0x??)}, */ +/* {USB_DEVICE(0x0c45, 0x60c8), BSI(SN9C105, OM6802, 0x??)}, */ /* {USB_DEVICE(0x0c45, 0x60cc), BSI(SN9C105, HV7131GP, 0x??)}, */ {USB_DEVICE(0x0c45, 0x60ec), BSI(SN9C105, MO4000, 0x21)}, /* {USB_DEVICE(0x0c45, 0x60ef), BSI(SN9C105, ICM105C, 0x??)}, */ /* {USB_DEVICE(0x0c45, 0x60fa), BSI(SN9C105, OV7648, 0x??)}, */ {USB_DEVICE(0x0c45, 0x60fb), BSI(SN9C105, OV7660, 0x21)}, - {USB_DEVICE(0x0c45, 0x60fc), BSI(SN9C105, HV7131R, 0x11)}, #if !defined CONFIG_USB_SN9C102 && !defined CONFIG_USB_SN9C102_MODULE + {USB_DEVICE(0x0c45, 0x60fc), BSI(SN9C105, HV7131R, 0x11)}, {USB_DEVICE(0x0c45, 0x60fe), BSI(SN9C105, OV7630, 0x21)}, #endif {USB_DEVICE(0x0c45, 0x6100), BSI(SN9C120, MI0360, 0x5d)}, /*sn9c128*/ -/* {USB_DEVICE(0x0c45, 0x6108), BSI(SN9C120, OM6801, 0x??)}, */ +/* {USB_DEVICE(0x0c45, 0x6102), BSI(SN9C120, PO2030N, ??)}, */ +/* {USB_DEVICE(0x0c45, 0x6108), BSI(SN9C120, OM6802, 0x21)}, */ {USB_DEVICE(0x0c45, 0x610a), BSI(SN9C120, OV7648, 0x21)}, /*sn9c128*/ {USB_DEVICE(0x0c45, 0x610b), BSI(SN9C120, OV7660, 0x21)}, /*sn9c128*/ {USB_DEVICE(0x0c45, 0x610c), BSI(SN9C120, HV7131R, 0x11)}, /*sn9c128*/ @@ -2352,6 +2347,7 @@ #if !defined CONFIG_USB_SN9C102 && !defined CONFIG_USB_SN9C102_MODULE {USB_DEVICE(0x0c45, 0x6130), BSI(SN9C120, MI0360, 0x5d)}, #endif +/* {USB_DEVICE(0x0c45, 0x6132), BSI(SN9C120, OV7670, 0x21)}, */ {USB_DEVICE(0x0c45, 0x6138), BSI(SN9C120, MO4000, 0x21)}, {USB_DEVICE(0x0c45, 0x613a), BSI(SN9C120, OV7648, 0x21)}, #if !defined CONFIG_USB_SN9C102 && !defined CONFIG_USB_SN9C102_MODULE @@ -2359,7 +2355,9 @@ #endif {USB_DEVICE(0x0c45, 0x613c), BSI(SN9C120, HV7131R, 0x11)}, {USB_DEVICE(0x0c45, 0x613e), BSI(SN9C120, OV7630, 0x21)}, - {USB_DEVICE(0x0c45, 0x6143), BSI(SN9C120, SP80708, 0x18)}, +/* {USB_DEVICE(0x0c45, 0x6142), BSI(SN9C120, PO2030N, ??)}, *sn9c120b*/ + {USB_DEVICE(0x0c45, 0x6143), BSI(SN9C120, SP80708, 0x18)}, /*sn9c120b*/ + {USB_DEVICE(0x0c45, 0x6148), BSI(SN9C120, OM6802, 0x21)}, /*sn9c120b*/ {} }; MODULE_DEVICE_TABLE(usb, device_table); --- linux-ec2-2.6.31.orig/drivers/media/video/gspca/ov519.c +++ linux-ec2-2.6.31/drivers/media/video/gspca/ov519.c @@ -3364,6 +3364,7 @@ {USB_DEVICE(0x041e, 0x4061), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4064), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, + {USB_DEVICE(0x041e, 0x4067), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4068), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, {USB_DEVICE(0x045e, 0x028c), .driver_info = BRIDGE_OV519 }, --- linux-ec2-2.6.31.orig/drivers/media/video/gspca/vc032x.c +++ linux-ec2-2.6.31/drivers/media/video/gspca/vc032x.c @@ -424,208 +424,92 @@ static const __u8 mi1310_socinitVGA_JPG[][4] = { {0xb0, 0x03, 0x19, 0xcc}, {0xb0, 0x04, 0x02, 0xcc}, - {0xb3, 0x00, 0x24, 0xcc}, - {0xb3, 0x00, 0x25, 0xcc}, - {0xb3, 0x05, 0x01, 0xcc}, - {0xb3, 0x06, 0x03, 0xcc}, - {0xb3, 0x5c, 0x01, 0xcc}, + {0xb3, 0x00, 0x64, 0xcc}, + {0xb3, 0x00, 0x65, 0xcc}, + {0xb3, 0x05, 0x00, 0xcc}, + {0xb3, 0x06, 0x00, 0xcc}, {0xb3, 0x08, 0x01, 0xcc}, {0xb3, 0x09, 0x0c, 0xcc}, {0xb3, 0x34, 0x02, 0xcc}, {0xb3, 0x35, 0xdd, 0xcc}, + {0xb3, 0x02, 0x00, 0xcc}, {0xb3, 0x03, 0x0a, 0xcc}, - {0xb3, 0x04, 0x0d, 0xcc}, + {0xb3, 0x04, 0x05, 0xcc}, {0xb3, 0x20, 0x00, 0xcc}, {0xb3, 0x21, 0x00, 0xcc}, - {0xb3, 0x22, 0x01, 0xcc}, - {0xb3, 0x23, 0xe0, 0xcc}, + {0xb3, 0x22, 0x03, 0xcc}, + {0xb3, 0x23, 0xc0, 0xcc}, {0xb3, 0x14, 0x00, 0xcc}, {0xb3, 0x15, 0x00, 0xcc}, - {0xb3, 0x16, 0x02, 0xcc}, - {0xb3, 0x17, 0x7f, 0xcc}, - {0xb8, 0x01, 0x7d, 0xcc}, - {0xb8, 0x81, 0x09, 0xcc}, - {0xb8, 0x27, 0x20, 0xcc}, - {0xb8, 0x26, 0x80, 0xcc}, - {0xb3, 0x00, 0x25, 0xcc}, - {0xb8, 0x00, 0x13, 0xcc}, - {0xbc, 0x00, 0x71, 0xcc}, - {0xb8, 0x81, 0x01, 0xcc}, - {0xb8, 0x2c, 0x5a, 0xcc}, - {0xb8, 0x2d, 0xff, 0xcc}, - {0xb8, 0x2e, 0xee, 0xcc}, - {0xb8, 0x2f, 0xfb, 0xcc}, - {0xb8, 0x30, 0x52, 0xcc}, - {0xb8, 0x31, 0xf8, 0xcc}, - {0xb8, 0x32, 0xf1, 0xcc}, - {0xb8, 0x33, 0xff, 0xcc}, - {0xb8, 0x34, 0x54, 0xcc}, - {0xb8, 0x35, 0x00, 0xcc}, - {0xb8, 0x36, 0x00, 0xcc}, - {0xb8, 0x37, 0x00, 0xcc}, + {0xb3, 0x16, 0x04, 0xcc}, + {0xb3, 0x17, 0xff, 0xcc}, + {0xb3, 0x00, 0x65, 0xcc}, + {0xb8, 0x00, 0x00, 0xcc}, + {0xbc, 0x00, 0xd0, 0xcc}, + {0xbc, 0x01, 0x01, 0xcc}, + {0xf0, 0x00, 0x02, 0xbb}, + {0xc8, 0x9f, 0x0b, 0xbb}, + {0x5b, 0x00, 0x01, 0xbb}, + {0x2f, 0xde, 0x20, 0xbb}, {0xf0, 0x00, 0x00, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x0d, 0x00, 0x09, 0xbb}, - {0x0d, 0x00, 0x08, 0xbb}, + {0x20, 0x03, 0x02, 0xbb}, {0xf0, 0x00, 0x01, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x06, 0x00, 0x14, 0xbb}, - {0x3a, 0x10, 0x00, 0xbb}, - {0x00, 0x00, 0x10, 0xdd}, - {0x9b, 0x10, 0x00, 0xbb}, - {0x00, 0x00, 0x10, 0xdd}, + {0x05, 0x00, 0x07, 0xbb}, + {0x34, 0x00, 0x00, 0xbb}, + {0x35, 0xff, 0x00, 0xbb}, + {0xdc, 0x07, 0x02, 0xbb}, + {0xdd, 0x3c, 0x18, 0xbb}, + {0xde, 0x92, 0x6d, 0xbb}, + {0xdf, 0xcd, 0xb1, 0xbb}, + {0xe0, 0xff, 0xe7, 0xbb}, + {0x06, 0xf0, 0x0d, 0xbb}, + {0x06, 0x70, 0x0e, 0xbb}, + {0x4c, 0x00, 0x01, 0xbb}, + {0x4d, 0x00, 0x01, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, + {0x2e, 0x0c, 0x55, 0xbb}, + {0x21, 0xb6, 0x6e, 0xbb}, + {0x36, 0x30, 0x10, 0xbb}, + {0x37, 0x00, 0xc1, 0xbb}, {0xf0, 0x00, 0x00, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x2b, 0x00, 0x28, 0xbb}, - {0x2c, 0x00, 0x30, 0xbb}, - {0x2d, 0x00, 0x30, 0xbb}, - {0x2e, 0x00, 0x28, 0xbb}, - {0x41, 0x00, 0xd7, 0xbb}, - {0x09, 0x02, 0x3a, 0xbb}, - {0x0c, 0x00, 0x00, 0xbb}, - {0x20, 0x00, 0x00, 0xbb}, - {0x05, 0x00, 0x8c, 0xbb}, - {0x06, 0x00, 0x32, 0xbb}, - {0x07, 0x00, 0xc6, 0xbb}, - {0x08, 0x00, 0x19, 0xbb}, - {0x24, 0x80, 0x6f, 0xbb}, - {0xc8, 0x00, 0x0f, 0xbb}, - {0x20, 0x00, 0x0f, 0xbb}, + {0x07, 0x00, 0x84, 0xbb}, + {0x08, 0x02, 0x4a, 0xbb}, + {0x05, 0x01, 0x10, 0xbb}, + {0x06, 0x00, 0x39, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, + {0x58, 0x02, 0x67, 0xbb}, + {0x57, 0x02, 0x00, 0xbb}, + {0x5a, 0x02, 0x67, 0xbb}, + {0x59, 0x02, 0x00, 0xbb}, + {0x5c, 0x12, 0x0d, 0xbb}, + {0x5d, 0x16, 0x11, 0xbb}, + {0x39, 0x06, 0x18, 0xbb}, + {0x3a, 0x06, 0x18, 0xbb}, + {0x3b, 0x06, 0x18, 0xbb}, + {0x3c, 0x06, 0x18, 0xbb}, + {0x64, 0x7b, 0x5b, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, + {0x36, 0x30, 0x10, 0xbb}, + {0x37, 0x00, 0xc0, 0xbb}, + {0xbc, 0x0e, 0x00, 0xcc}, + {0xbc, 0x0f, 0x05, 0xcc}, + {0xbc, 0x10, 0xc0, 0xcc}, + {0xbc, 0x11, 0x03, 0xcc}, {0xb6, 0x00, 0x00, 0xcc}, {0xb6, 0x03, 0x02, 0xcc}, {0xb6, 0x02, 0x80, 0xcc}, {0xb6, 0x05, 0x01, 0xcc}, {0xb6, 0x04, 0xe0, 0xcc}, - {0xb6, 0x12, 0x78, 0xcc}, + {0xb6, 0x12, 0xf8, 0xcc}, + {0xb6, 0x13, 0x25, 0xcc}, {0xb6, 0x18, 0x02, 0xcc}, {0xb6, 0x17, 0x58, 0xcc}, {0xb6, 0x16, 0x00, 0xcc}, {0xb6, 0x22, 0x12, 0xcc}, {0xb6, 0x23, 0x0b, 0xcc}, - {0xb3, 0x02, 0x02, 0xcc}, {0xbf, 0xc0, 0x39, 0xcc}, {0xbf, 0xc1, 0x04, 0xcc}, - {0xbf, 0xcc, 0x10, 0xcc}, - {0xb9, 0x12, 0x00, 0xcc}, - {0xb9, 0x13, 0x0a, 0xcc}, - {0xb9, 0x14, 0x0a, 0xcc}, - {0xb9, 0x15, 0x0a, 0xcc}, - {0xb9, 0x16, 0x0a, 0xcc}, - {0xb9, 0x18, 0x00, 0xcc}, - {0xb9, 0x19, 0x0f, 0xcc}, - {0xb9, 0x1a, 0x0f, 0xcc}, - {0xb9, 0x1b, 0x0f, 0xcc}, - {0xb9, 0x1c, 0x0f, 0xcc}, - {0xb8, 0x8e, 0x00, 0xcc}, - {0xb8, 0x8f, 0xff, 0xcc}, - {0xb3, 0x01, 0x41, 0xcc}, - {0x03, 0x03, 0xc0, 0xbb}, - {0x06, 0x00, 0x10, 0xbb}, - {0xb6, 0x12, 0xf8, 0xcc}, - {0xb8, 0x0c, 0x20, 0xcc}, - {0xb8, 0x0d, 0x70, 0xcc}, - {0xb6, 0x13, 0x13, 0xcc}, - {0x2f, 0x00, 0xC0, 0xbb}, - {0xb8, 0xa0, 0x12, 0xcc}, - {}, -}; -static const __u8 mi1310_socinitQVGA_JPG[][4] = { - {0xb0, 0x03, 0x19, 0xcc}, - {0xb0, 0x04, 0x02, 0xcc}, - {0xb3, 0x00, 0x24, 0xcc}, - {0xb3, 0x00, 0x25, 0xcc}, - {0xb3, 0x05, 0x01, 0xcc}, - {0xb3, 0x06, 0x03, 0xcc}, - {0xb3, 0x5c, 0x01, 0xcc}, - {0xb3, 0x08, 0x01, 0xcc}, - {0xb3, 0x09, 0x0c, 0xcc}, - {0xb3, 0x34, 0x02, 0xcc}, - {0xb3, 0x35, 0xdd, 0xcc}, - {0xb3, 0x03, 0x0a, 0xcc}, - {0xb3, 0x04, 0x0d, 0xcc}, - {0xb3, 0x20, 0x00, 0xcc}, - {0xb3, 0x21, 0x00, 0xcc}, - {0xb3, 0x22, 0x01, 0xcc}, - {0xb3, 0x23, 0xe0, 0xcc}, - {0xb3, 0x14, 0x00, 0xcc}, - {0xb3, 0x15, 0x00, 0xcc}, - {0xb3, 0x16, 0x02, 0xcc}, - {0xb3, 0x17, 0x7f, 0xcc}, - {0xb8, 0x01, 0x7d, 0xcc}, - {0xb8, 0x81, 0x09, 0xcc}, - {0xb8, 0x27, 0x20, 0xcc}, - {0xb8, 0x26, 0x80, 0xcc}, - {0xb3, 0x00, 0x25, 0xcc}, - {0xb8, 0x00, 0x13, 0xcc}, - {0xbc, 0x00, 0xd1, 0xcc}, - {0xb8, 0x81, 0x01, 0xcc}, - {0xb8, 0x2c, 0x5a, 0xcc}, - {0xb8, 0x2d, 0xff, 0xcc}, - {0xb8, 0x2e, 0xee, 0xcc}, - {0xb8, 0x2f, 0xfb, 0xcc}, - {0xb8, 0x30, 0x52, 0xcc}, - {0xb8, 0x31, 0xf8, 0xcc}, - {0xb8, 0x32, 0xf1, 0xcc}, - {0xb8, 0x33, 0xff, 0xcc}, - {0xb8, 0x34, 0x54, 0xcc}, - {0xb8, 0x35, 0x00, 0xcc}, - {0xb8, 0x36, 0x00, 0xcc}, - {0xb8, 0x37, 0x00, 0xcc}, - {0xf0, 0x00, 0x00, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x0d, 0x00, 0x09, 0xbb}, - {0x0d, 0x00, 0x08, 0xbb}, - {0xf0, 0x00, 0x01, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x06, 0x00, 0x14, 0xbb}, - {0x3a, 0x10, 0x00, 0xbb}, - {0x00, 0x00, 0x10, 0xdd}, - {0x9b, 0x10, 0x00, 0xbb}, - {0x00, 0x00, 0x10, 0xdd}, - {0xf0, 0x00, 0x00, 0xbb}, - {0x00, 0x01, 0x00, 0xdd}, - {0x2b, 0x00, 0x28, 0xbb}, - {0x2c, 0x00, 0x30, 0xbb}, - {0x2d, 0x00, 0x30, 0xbb}, - {0x2e, 0x00, 0x28, 0xbb}, - {0x41, 0x00, 0xd7, 0xbb}, - {0x09, 0x02, 0x3a, 0xbb}, - {0x0c, 0x00, 0x00, 0xbb}, - {0x20, 0x00, 0x00, 0xbb}, - {0x05, 0x00, 0x8c, 0xbb}, - {0x06, 0x00, 0x32, 0xbb}, - {0x07, 0x00, 0xc6, 0xbb}, - {0x08, 0x00, 0x19, 0xbb}, - {0x24, 0x80, 0x6f, 0xbb}, - {0xc8, 0x00, 0x0f, 0xbb}, - {0x20, 0x00, 0x0f, 0xbb}, - {0xb6, 0x00, 0x00, 0xcc}, - {0xb6, 0x03, 0x01, 0xcc}, - {0xb6, 0x02, 0x40, 0xcc}, - {0xb6, 0x05, 0x00, 0xcc}, - {0xb6, 0x04, 0xf0, 0xcc}, - {0xb6, 0x12, 0x78, 0xcc}, - {0xb6, 0x18, 0x00, 0xcc}, - {0xb6, 0x17, 0x96, 0xcc}, - {0xb6, 0x16, 0x00, 0xcc}, - {0xb6, 0x22, 0x12, 0xcc}, - {0xb6, 0x23, 0x0b, 0xcc}, - {0xb3, 0x02, 0x02, 0xcc}, - {0xbf, 0xc0, 0x39, 0xcc}, - {0xbf, 0xc1, 0x04, 0xcc}, - {0xbf, 0xcc, 0x10, 0xcc}, - {0xb9, 0x12, 0x00, 0xcc}, - {0xb9, 0x13, 0x0a, 0xcc}, - {0xb9, 0x14, 0x0a, 0xcc}, - {0xb9, 0x15, 0x0a, 0xcc}, - {0xb9, 0x16, 0x0a, 0xcc}, - {0xb9, 0x18, 0x00, 0xcc}, - {0xb9, 0x19, 0x0f, 0xcc}, - {0xb9, 0x1a, 0x0f, 0xcc}, - {0xb9, 0x1b, 0x0f, 0xcc}, - {0xb9, 0x1c, 0x0f, 0xcc}, - {0xb8, 0x8e, 0x00, 0xcc}, - {0xb8, 0x8f, 0xff, 0xcc}, + {0xbf, 0xcc, 0x00, 0xcc}, {0xbc, 0x02, 0x18, 0xcc}, {0xbc, 0x03, 0x50, 0xcc}, {0xbc, 0x04, 0x18, 0xcc}, @@ -636,15 +520,130 @@ {0xbc, 0x0a, 0x10, 0xcc}, {0xbc, 0x0b, 0x00, 0xcc}, {0xbc, 0x0c, 0x00, 0xcc}, + {0xb3, 0x5c, 0x01, 0xcc}, + {0xf0, 0x00, 0x01, 0xbb}, + {0x80, 0x00, 0x03, 0xbb}, + {0x81, 0xc7, 0x14, 0xbb}, + {0x82, 0xeb, 0xe8, 0xbb}, + {0x83, 0xfe, 0xf4, 0xbb}, + {0x84, 0xcd, 0x10, 0xbb}, + {0x85, 0xf3, 0xee, 0xbb}, + {0x86, 0xff, 0xf1, 0xbb}, + {0x87, 0xcd, 0x10, 0xbb}, + {0x88, 0xf3, 0xee, 0xbb}, + {0x89, 0x01, 0xf1, 0xbb}, + {0x8a, 0xe5, 0x17, 0xbb}, + {0x8b, 0xe8, 0xe2, 0xbb}, + {0x8c, 0xf7, 0xed, 0xbb}, + {0x8d, 0x00, 0xff, 0xbb}, + {0x8e, 0xec, 0x10, 0xbb}, + {0x8f, 0xf0, 0xed, 0xbb}, + {0x90, 0xf9, 0xf2, 0xbb}, + {0x91, 0x00, 0x00, 0xbb}, + {0x92, 0xe9, 0x0d, 0xbb}, + {0x93, 0xf4, 0xf2, 0xbb}, + {0x94, 0xfb, 0xf5, 0xbb}, + {0x95, 0x00, 0xff, 0xbb}, + {0xb6, 0x0f, 0x08, 0xbb}, + {0xb7, 0x3d, 0x16, 0xbb}, + {0xb8, 0x0c, 0x04, 0xbb}, + {0xb9, 0x1c, 0x07, 0xbb}, + {0xba, 0x0a, 0x03, 0xbb}, + {0xbb, 0x1b, 0x09, 0xbb}, + {0xbc, 0x17, 0x0d, 0xbb}, + {0xbd, 0x23, 0x1d, 0xbb}, + {0xbe, 0x00, 0x28, 0xbb}, + {0xbf, 0x11, 0x09, 0xbb}, + {0xc0, 0x16, 0x15, 0xbb}, + {0xc1, 0x00, 0x1b, 0xbb}, + {0xc2, 0x0e, 0x07, 0xbb}, + {0xc3, 0x14, 0x10, 0xbb}, + {0xc4, 0x00, 0x17, 0xbb}, + {0x06, 0x74, 0x8e, 0xbb}, + {0xf0, 0x00, 0x01, 0xbb}, + {0x06, 0xf4, 0x8e, 0xbb}, + {0x00, 0x00, 0x50, 0xdd}, + {0x06, 0x74, 0x8e, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, + {0x24, 0x50, 0x20, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, + {0x34, 0x0c, 0x50, 0xbb}, {0xb3, 0x01, 0x41, 0xcc}, + {0xf0, 0x00, 0x00, 0xbb}, + {0x03, 0x03, 0xc0, 0xbb}, + {}, +}; +static const __u8 mi1310_socinitQVGA_JPG[][4] = { + {0xb0, 0x03, 0x19, 0xcc}, {0xb0, 0x04, 0x02, 0xcc}, + {0xb3, 0x00, 0x64, 0xcc}, {0xb3, 0x00, 0x65, 0xcc}, + {0xb3, 0x05, 0x00, 0xcc}, {0xb3, 0x06, 0x00, 0xcc}, + {0xb3, 0x08, 0x01, 0xcc}, {0xb3, 0x09, 0x0c, 0xcc}, + {0xb3, 0x34, 0x02, 0xcc}, {0xb3, 0x35, 0xdd, 0xcc}, + {0xb3, 0x02, 0x00, 0xcc}, {0xb3, 0x03, 0x0a, 0xcc}, + {0xb3, 0x04, 0x05, 0xcc}, {0xb3, 0x20, 0x00, 0xcc}, + {0xb3, 0x21, 0x00, 0xcc}, {0xb3, 0x22, 0x03, 0xcc}, + {0xb3, 0x23, 0xc0, 0xcc}, {0xb3, 0x14, 0x00, 0xcc}, + {0xb3, 0x15, 0x00, 0xcc}, {0xb3, 0x16, 0x04, 0xcc}, + {0xb3, 0x17, 0xff, 0xcc}, {0xb3, 0x00, 0x65, 0xcc}, + {0xb8, 0x00, 0x00, 0xcc}, {0xbc, 0x00, 0xf0, 0xcc}, + {0xbc, 0x01, 0x01, 0xcc}, {0xf0, 0x00, 0x02, 0xbb}, + {0xc8, 0x9f, 0x0b, 0xbb}, {0x5b, 0x00, 0x01, 0xbb}, + {0x2f, 0xde, 0x20, 0xbb}, {0xf0, 0x00, 0x00, 0xbb}, + {0x20, 0x03, 0x02, 0xbb}, {0xf0, 0x00, 0x01, 0xbb}, + {0x05, 0x00, 0x07, 0xbb}, {0x34, 0x00, 0x00, 0xbb}, + {0x35, 0xff, 0x00, 0xbb}, {0xdc, 0x07, 0x02, 0xbb}, + {0xdd, 0x3c, 0x18, 0xbb}, {0xde, 0x92, 0x6d, 0xbb}, + {0xdf, 0xcd, 0xb1, 0xbb}, {0xe0, 0xff, 0xe7, 0xbb}, + {0x06, 0xf0, 0x0d, 0xbb}, {0x06, 0x70, 0x0e, 0xbb}, + {0x4c, 0x00, 0x01, 0xbb}, {0x4d, 0x00, 0x01, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, {0x2e, 0x0c, 0x55, 0xbb}, + {0x21, 0xb6, 0x6e, 0xbb}, {0x36, 0x30, 0x10, 0xbb}, + {0x37, 0x00, 0xc1, 0xbb}, {0xf0, 0x00, 0x00, 0xbb}, + {0x07, 0x00, 0x84, 0xbb}, {0x08, 0x02, 0x4a, 0xbb}, + {0x05, 0x01, 0x10, 0xbb}, {0x06, 0x00, 0x39, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, {0x58, 0x02, 0x67, 0xbb}, + {0x57, 0x02, 0x00, 0xbb}, {0x5a, 0x02, 0x67, 0xbb}, + {0x59, 0x02, 0x00, 0xbb}, {0x5c, 0x12, 0x0d, 0xbb}, + {0x5d, 0x16, 0x11, 0xbb}, {0x39, 0x06, 0x18, 0xbb}, + {0x3a, 0x06, 0x18, 0xbb}, {0x3b, 0x06, 0x18, 0xbb}, + {0x3c, 0x06, 0x18, 0xbb}, {0x64, 0x7b, 0x5b, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, {0x36, 0x30, 0x10, 0xbb}, + {0x37, 0x00, 0xc0, 0xbb}, {0xbc, 0x0e, 0x00, 0xcc}, + {0xbc, 0x0f, 0x05, 0xcc}, {0xbc, 0x10, 0xc0, 0xcc}, + {0xbc, 0x11, 0x03, 0xcc}, {0xb6, 0x00, 0x00, 0xcc}, + {0xb6, 0x03, 0x01, 0xcc}, {0xb6, 0x02, 0x40, 0xcc}, + {0xb6, 0x05, 0x00, 0xcc}, {0xb6, 0x04, 0xf0, 0xcc}, + {0xb6, 0x12, 0xf8, 0xcc}, {0xb6, 0x13, 0x25, 0xcc}, + {0xb6, 0x18, 0x00, 0xcc}, {0xb6, 0x17, 0x96, 0xcc}, + {0xb6, 0x16, 0x00, 0xcc}, {0xb6, 0x22, 0x12, 0xcc}, + {0xb6, 0x23, 0x0b, 0xcc}, {0xbf, 0xc0, 0x39, 0xcc}, + {0xbf, 0xc1, 0x04, 0xcc}, {0xbf, 0xcc, 0x00, 0xcc}, + {0xb3, 0x5c, 0x01, 0xcc}, {0xf0, 0x00, 0x01, 0xbb}, + {0x80, 0x00, 0x03, 0xbb}, {0x81, 0xc7, 0x14, 0xbb}, + {0x82, 0xeb, 0xe8, 0xbb}, {0x83, 0xfe, 0xf4, 0xbb}, + {0x84, 0xcd, 0x10, 0xbb}, {0x85, 0xf3, 0xee, 0xbb}, + {0x86, 0xff, 0xf1, 0xbb}, {0x87, 0xcd, 0x10, 0xbb}, + {0x88, 0xf3, 0xee, 0xbb}, {0x89, 0x01, 0xf1, 0xbb}, + {0x8a, 0xe5, 0x17, 0xbb}, {0x8b, 0xe8, 0xe2, 0xbb}, + {0x8c, 0xf7, 0xed, 0xbb}, {0x8d, 0x00, 0xff, 0xbb}, + {0x8e, 0xec, 0x10, 0xbb}, {0x8f, 0xf0, 0xed, 0xbb}, + {0x90, 0xf9, 0xf2, 0xbb}, {0x91, 0x00, 0x00, 0xbb}, + {0x92, 0xe9, 0x0d, 0xbb}, {0x93, 0xf4, 0xf2, 0xbb}, + {0x94, 0xfb, 0xf5, 0xbb}, {0x95, 0x00, 0xff, 0xbb}, + {0xb6, 0x0f, 0x08, 0xbb}, {0xb7, 0x3d, 0x16, 0xbb}, + {0xb8, 0x0c, 0x04, 0xbb}, {0xb9, 0x1c, 0x07, 0xbb}, + {0xba, 0x0a, 0x03, 0xbb}, {0xbb, 0x1b, 0x09, 0xbb}, + {0xbc, 0x17, 0x0d, 0xbb}, {0xbd, 0x23, 0x1d, 0xbb}, + {0xbe, 0x00, 0x28, 0xbb}, {0xbf, 0x11, 0x09, 0xbb}, + {0xc0, 0x16, 0x15, 0xbb}, {0xc1, 0x00, 0x1b, 0xbb}, + {0xc2, 0x0e, 0x07, 0xbb}, {0xc3, 0x14, 0x10, 0xbb}, + {0xc4, 0x00, 0x17, 0xbb}, {0x06, 0x74, 0x8e, 0xbb}, + {0xf0, 0x00, 0x01, 0xbb}, {0x06, 0xf4, 0x8e, 0xbb}, + {0x00, 0x00, 0x50, 0xdd}, {0x06, 0x74, 0x8e, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, {0x24, 0x50, 0x20, 0xbb}, + {0xf0, 0x00, 0x02, 0xbb}, {0x34, 0x0c, 0x50, 0xbb}, + {0xb3, 0x01, 0x41, 0xcc}, {0xf0, 0x00, 0x00, 0xbb}, {0x03, 0x03, 0xc0, 0xbb}, - {0x06, 0x00, 0x10, 0xbb}, - {0xb6, 0x12, 0xf8, 0xcc}, - {0xb8, 0x0c, 0x20, 0xcc}, - {0xb8, 0x0d, 0x70, 0xcc}, - {0xb6, 0x13, 0x13, 0xcc}, - {0x2f, 0x00, 0xC0, 0xbb}, - {0xb8, 0xa0, 0x12, 0xcc}, {}, }; static const u8 mi1310_soc_InitSXGA_JPG[][4] = { @@ -2514,7 +2513,8 @@ break; case SENSOR_MI1310_SOC: cam->cam_mode = vc0323_mode; - cam->nmodes = ARRAY_SIZE(vc0323_mode); + /* TODO: 1280x1024 resolution setting is incorrect, so don't export it now*/ + cam->nmodes = ARRAY_SIZE(vc0323_mode) - 1; break; case SENSOR_MI1320_SOC: cam->cam_mode = bi_mode; @@ -2737,15 +2737,20 @@ put_tab_to_reg(gspca_dev, MatrixT, 9, 0xb82c); /* set the led on 0x0892 0x0896 */ - if (sd->sensor != SENSOR_PO1200) { - reg_w(gspca_dev->dev, 0x89, 0xffff, 0xfdff); + if (sd->sensor == SENSOR_PO1200) { + setsharpness(gspca_dev); + sethvflip(gspca_dev); + reg_w(gspca_dev->dev, 0x89, 0x0400, 0x1415); + } else if (sd->sensor == SENSOR_MI1310_SOC) { + reg_w(gspca_dev->dev, 0x89, 0x058c, 0x0000); msleep(100); sethvflip(gspca_dev); setlightfreq(gspca_dev); } else { - setsharpness(gspca_dev); + reg_w(gspca_dev->dev, 0x89, 0xffff, 0xfdff); + msleep(100); sethvflip(gspca_dev); - reg_w(gspca_dev->dev, 0x89, 0x0400, 0x1415); + setlightfreq(gspca_dev); } } return 0; @@ -2754,8 +2759,12 @@ static void sd_stopN(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; + struct sd *sd = (struct sd *) gspca_dev; - reg_w(dev, 0x89, 0xffff, 0xffff); + if( sd->sensor == SENSOR_MI1310_SOC) + reg_w(dev, 0x89, 0x058c, 0x00ff); + else + reg_w(dev, 0x89, 0xffff, 0xffff); reg_w(dev, 0xa0, 0x01, 0xb301); reg_w(dev, 0xa0, 0x09, 0xb003); } @@ -2764,10 +2773,14 @@ static void sd_stop0(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; + struct sd *sd = (struct sd *) gspca_dev; if (!gspca_dev->present) return; - reg_w(dev, 0x89, 0xffff, 0xffff); + if( sd->sensor == SENSOR_MI1310_SOC) + reg_w(dev, 0x89, 0x058c, 0x00ff); + else + reg_w(dev, 0x89, 0xffff, 0xffff); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, --- linux-ec2-2.6.31.orig/drivers/media/video/gspca/m5602/m5602_s5k4aa.c +++ linux-ec2-2.6.31/drivers/media/video/gspca/m5602/m5602_s5k4aa.c @@ -35,12 +35,25 @@ const struct dmi_system_id s5k4aa_vflip_dmi_table[] = { { + .ident = "BRUNEINIT", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "BRUNENIT"), + DMI_MATCH(DMI_PRODUCT_NAME, "BRUNENIT"), + DMI_MATCH(DMI_BOARD_VERSION, "00030D0000000001") + } + }, { .ident = "Fujitsu-Siemens Amilo Xa 2528", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xa 2528") } }, { + .ident = "Fujitsu-Siemens Amilo Xi 2528", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xi 2528") + } + }, { .ident = "Fujitsu-Siemens Amilo Xi 2550", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), @@ -51,6 +64,13 @@ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"), DMI_MATCH(DMI_PRODUCT_NAME, "GX700"), + DMI_MATCH(DMI_BIOS_DATE, "12/02/2008") + } + }, { + .ident = "MSI GX700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"), + DMI_MATCH(DMI_PRODUCT_NAME, "GX700"), DMI_MATCH(DMI_BIOS_DATE, "07/26/2007") } }, { --- linux-ec2-2.6.31.orig/drivers/media/video/sn9c102/sn9c102_devtable.h +++ linux-ec2-2.6.31/drivers/media/video/sn9c102/sn9c102_devtable.h @@ -123,8 +123,8 @@ { SN9C102_USB_DEVICE(0x0c45, 0x613b, BRIDGE_SN9C120), }, #if !defined CONFIG_USB_GSPCA && !defined CONFIG_USB_GSPCA_MODULE { SN9C102_USB_DEVICE(0x0c45, 0x613c, BRIDGE_SN9C120), }, -#endif { SN9C102_USB_DEVICE(0x0c45, 0x613e, BRIDGE_SN9C120), }, +#endif { } }; --- linux-ec2-2.6.31.orig/drivers/media/dvb/siano/smsusb.c +++ linux-ec2-2.6.31/drivers/media/dvb/siano/smsusb.c @@ -529,6 +529,12 @@ .driver_info = SMS1XXX_BOARD_SIANO_NICE }, { USB_DEVICE(0x187f, 0x0301), .driver_info = SMS1XXX_BOARD_SIANO_VENICE }, + { USB_DEVICE(0x2040, 0xb900), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, + { USB_DEVICE(0x2040, 0xb910), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, + { USB_DEVICE(0x2040, 0xc000), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { } /* Terminating entry */ }; --- linux-ec2-2.6.31.orig/drivers/media/dvb/frontends/dib7000p.c +++ linux-ec2-2.6.31/drivers/media/dvb/frontends/dib7000p.c @@ -1344,6 +1344,11 @@ if (dib7000p_identify(st) != 0) goto error; + /* FIXME: make sure the dev.parent field is initialized, or else + request_firmware() will hit an OOPS (this should be moved somewhere + more common) */ + st->i2c_master.gated_tuner_i2c_adap.dev.parent = i2c_adap->dev.parent; + dibx000_init_i2c_master(&st->i2c_master, DIB7000P, st->i2c_adap, st->i2c_addr); dib7000p_demod_reset(st); --- linux-ec2-2.6.31.orig/drivers/media/dvb/dvb-usb/cinergyT2-fe.c +++ linux-ec2-2.6.31/drivers/media/dvb/dvb-usb/cinergyT2-fe.c @@ -275,6 +275,7 @@ param.tps = cpu_to_le16(compute_tps(fep)); param.freq = cpu_to_le32(fep->frequency / 1000); param.bandwidth = 8 - fep->u.ofdm.bandwidth - BANDWIDTH_8_MHZ; + param.flags = 0; err = dvb_usb_generic_rw(state->d, (char *)¶m, sizeof(param), --- linux-ec2-2.6.31.orig/drivers/media/dvb/dvb-usb/cxusb.c +++ linux-ec2-2.6.31/drivers/media/dvb/dvb-usb/cxusb.c @@ -663,6 +663,14 @@ .parallel_ts = 1, }; +static struct zl10353_config cxusb_zl10353_xc3028_config_no_i2c_gate = { + .demod_address = 0x0f, + .if2 = 45600, + .no_tuner = 1, + .parallel_ts = 1, + .disable_i2c_gate_ctrl = 1, +}; + static struct mt352_config cxusb_mt352_xc3028_config = { .demod_address = 0x0f, .if2 = 4560, @@ -894,7 +902,7 @@ cxusb_bluebird_gpio_pulse(adap->dev, 0x02, 1); if ((adap->fe = dvb_attach(zl10353_attach, - &cxusb_zl10353_xc3028_config, + &cxusb_zl10353_xc3028_config_no_i2c_gate, &adap->dev->i2c_adap)) == NULL) return -EIO; --- linux-ec2-2.6.31.orig/drivers/media/dvb/dvb-core/dvb_net.c +++ linux-ec2-2.6.31/drivers/media/dvb/dvb-core/dvb_net.c @@ -503,6 +503,7 @@ "bytes left in TS. Resyncing.\n", ts_remain); priv->ule_sndu_len = 0; priv->need_pusi = 1; + ts += TS_SZ; continue; } --- linux-ec2-2.6.31.orig/drivers/media/radio/radio-gemtek-pci.c +++ linux-ec2-2.6.31/drivers/media/radio/radio-gemtek-pci.c @@ -181,12 +181,10 @@ static void gemtek_pci_unmute(struct gemtek_pci *card) { - mutex_lock(&card->lock); if (card->mute) { gemtek_pci_setfrequency(card, card->current_frequency); card->mute = false; } - mutex_unlock(&card->lock); } static int gemtek_pci_getsignal(struct gemtek_pci *card) --- linux-ec2-2.6.31.orig/drivers/char/vt_ioctl.c +++ linux-ec2-2.6.31/drivers/char/vt_ioctl.c @@ -36,6 +36,8 @@ #include #include +#define max_font_size 65536 + char vt_dont_switch; extern struct tty_driver *console_driver; @@ -1262,6 +1264,7 @@ static void complete_change_console(struct vc_data *vc) { unsigned char old_vc_mode; + struct vc_data *oldvc = vc_cons[fg_console].d; last_console = fg_console; @@ -1270,9 +1273,31 @@ * KD_TEXT mode or vice versa, which means we need to blank or * unblank the screen later. */ - old_vc_mode = vc_cons[fg_console].d->vc_mode; + old_vc_mode = oldvc->vc_mode; + +#if defined(CONFIG_VGA_CONSOLE) + if (old_vc_mode == KD_TEXT && oldvc->vc_sw == &vga_con && + oldvc->vc_sw->con_font_get) { + if (!oldvc->vc_font.data) + oldvc->vc_font.data = kmalloc(max_font_size, + GFP_KERNEL); + lock_kernel(); + oldvc->vc_sw->con_font_get(oldvc, &oldvc->vc_font); + unlock_kernel(); + } +#endif switch_screen(vc); +#if defined(CONFIG_VGA_CONSOLE) + if (vc->vc_mode == KD_TEXT && vc->vc_sw == &vga_con && + vc->vc_sw->con_font_set) { + if (vc->vc_font.data) { + lock_kernel(); + vc->vc_sw->con_font_set(vc, &vc->vc_font, 0); + unlock_kernel(); + } + } +#endif /* * This can't appear below a successful kill_pid(). If it did, * then the *blank_screen operation could occur while X, having --- linux-ec2-2.6.31.orig/drivers/char/tty_ldisc.c +++ linux-ec2-2.6.31/drivers/char/tty_ldisc.c @@ -516,7 +516,7 @@ static int tty_ldisc_halt(struct tty_struct *tty) { clear_bit(TTY_LDISC, &tty->flags); - return cancel_delayed_work(&tty->buf.work); + return cancel_delayed_work_sync(&tty->buf.work); } /** @@ -754,12 +754,9 @@ * N_TTY. */ if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { - /* Make sure the old ldisc is quiescent */ - tty_ldisc_halt(tty); - flush_scheduled_work(); - /* Avoid racing set_ldisc or tty_ldisc_release */ mutex_lock(&tty->ldisc_mutex); + tty_ldisc_halt(tty); if (tty->ldisc) { /* Not yet closed */ /* Switch back to N_TTY */ tty_ldisc_reinit(tty); --- linux-ec2-2.6.31.orig/drivers/char/pty.c +++ linux-ec2-2.6.31/drivers/char/pty.c @@ -120,8 +120,10 @@ /* Stuff the data into the input queue of the other end */ c = tty_insert_flip_string(to, buf, c); /* And shovel */ - tty_flip_buffer_push(to); - tty_wakeup(tty); + if (c) { + tty_flip_buffer_push(to); + tty_wakeup(tty); + } } return c; } --- linux-ec2-2.6.31.orig/drivers/char/hvc_xen.c +++ linux-ec2-2.6.31/drivers/char/hvc_xen.c @@ -55,7 +55,7 @@ notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); } -static int write_console(uint32_t vtermno, const char *data, int len) +static int __write_console(const char *data, int len) { struct xencons_interface *intf = xencons_interface(); XENCONS_RING_IDX cons, prod; @@ -76,6 +76,29 @@ return sent; } +static int write_console(uint32_t vtermno, const char *data, int len) +{ + int ret = len; + + /* + * Make sure the whole buffer is emitted, polling if + * necessary. We don't ever want to rely on the hvc daemon + * because the most interesting console output is when the + * kernel is crippled. + */ + while (len) { + int sent = __write_console(data, len); + + data += sent; + len -= sent; + + if (unlikely(len)) + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + } + + return ret; +} + static int read_console(uint32_t vtermno, char *buf, int len) { struct xencons_interface *intf = xencons_interface(); --- linux-ec2-2.6.31.orig/drivers/char/Kconfig +++ linux-ec2-2.6.31/drivers/char/Kconfig @@ -659,7 +659,7 @@ config HVC_XEN bool "Xen Hypervisor Console support" - depends on XEN + depends on PARAVIRT_XEN select HVC_DRIVER select HVC_IRQ default y @@ -1052,7 +1052,7 @@ config HPET bool "HPET - High Precision Event Timer" if (X86 || IA64) default n - depends on ACPI + depends on ACPI && !XEN help If you say Y here, you will have a miscdevice named "/dev/hpet/". Each open selects one of the timers supported by the HPET. The timers are --- linux-ec2-2.6.31.orig/drivers/char/keyboard.c +++ linux-ec2-2.6.31/drivers/char/keyboard.c @@ -1068,6 +1068,8 @@ int code; switch (keycode) { + case KEY_RESERVED: + break; case KEY_PAUSE: put_queue(vc, 0xe1); put_queue(vc, 0x1d | up_flag); @@ -1125,6 +1127,8 @@ static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag) { + if (keycode == KEY_RESERVED) + return 0; if (keycode > 127) return -1; @@ -1249,7 +1253,7 @@ if (keycode >= NR_KEYS) if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8) - keysym = K(KT_BRL, keycode - KEY_BRL_DOT1 + 1); + keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1)); else return; else --- linux-ec2-2.6.31.orig/drivers/char/mem.c +++ linux-ec2-2.6.31/drivers/char/mem.c @@ -110,6 +110,7 @@ { } +#ifndef ARCH_HAS_DEV_MEM /* * This funcion reads the *physical* memory. The f_pos points directly to the * memory location. @@ -254,6 +255,7 @@ *ppos += written; return written; } +#endif int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) @@ -345,6 +347,9 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma) { unsigned long pfn; +#ifdef CONFIG_XEN + unsigned long i, count; +#endif /* Turn a kernel-virtual address into a physical page frame */ pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; @@ -359,6 +364,13 @@ if (!pfn_valid(pfn)) return -EIO; +#ifdef CONFIG_XEN + count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + for (i = 0; i < count; i++) + if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i))) + return -EIO; +#endif + vma->vm_pgoff = pfn; return mmap_mem(file, vma); } @@ -774,6 +786,7 @@ #define open_kmem open_mem #define open_oldmem open_mem +#ifndef ARCH_HAS_DEV_MEM static const struct file_operations mem_fops = { .llseek = memory_lseek, .read = read_mem, @@ -782,6 +795,9 @@ .open = open_mem, .get_unmapped_area = get_unmapped_area_mem, }; +#else +extern const struct file_operations mem_fops; +#endif #ifdef CONFIG_DEVKMEM static const struct file_operations kmem_fops = { --- linux-ec2-2.6.31.orig/drivers/char/n_tty.c +++ linux-ec2-2.6.31/drivers/char/n_tty.c @@ -272,7 +272,8 @@ * * This is a helper function that handles one output character * (including special characters like TAB, CR, LF, etc.), - * putting the results in the tty driver's write buffer. + * doing OPOST processing and putting the results in the + * tty driver's write buffer. * * Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY * and NLDLY. They simply aren't relevant in the world today. @@ -350,8 +351,9 @@ * @c: character (or partial unicode symbol) * @tty: terminal device * - * Perform OPOST processing. Returns -1 when the output device is - * full and the character must be retried. + * Output one character with OPOST processing. + * Returns -1 when the output device is full and the character + * must be retried. * * Locking: output_lock to protect column state and space left * (also, this is called from n_tty_write under the @@ -377,8 +379,11 @@ /** * process_output_block - block post processor * @tty: terminal device - * @inbuf: user buffer - * @nr: number of bytes + * @buf: character buffer + * @nr: number of bytes to output + * + * Output a block of characters with OPOST processing. + * Returns the number of characters output. * * This path is used to speed up block console writes, among other * things when processing blocks of output data. It handles only @@ -571,33 +576,23 @@ break; default: - if (iscntrl(op)) { - if (L_ECHOCTL(tty)) { - /* - * Ensure there is enough space - * for the whole ctrl pair. - */ - if (space < 2) { - no_space_left = 1; - break; - } - tty_put_char(tty, '^'); - tty_put_char(tty, op ^ 0100); - tty->column += 2; - space -= 2; - } else { - if (!space) { - no_space_left = 1; - break; - } - tty_put_char(tty, op); - space--; - } - } /* - * If above falls through, this was an - * undefined op. + * If the op is not a special byte code, + * it is a ctrl char tagged to be echoed + * as "^X" (where X is the letter + * representing the control char). + * Note that we must ensure there is + * enough space for the whole ctrl pair. + * */ + if (space < 2) { + no_space_left = 1; + break; + } + tty_put_char(tty, '^'); + tty_put_char(tty, op ^ 0100); + tty->column += 2; + space -= 2; cp += 2; nr -= 2; } @@ -605,12 +600,18 @@ if (no_space_left) break; } else { - int retval; - - retval = do_output_char(c, tty, space); - if (retval < 0) - break; - space -= retval; + if (O_OPOST(tty) && + !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) { + int retval = do_output_char(c, tty, space); + if (retval < 0) + break; + space -= retval; + } else { + if (!space) + break; + tty_put_char(tty, c); + space -= 1; + } cp += 1; nr -= 1; } @@ -798,8 +799,8 @@ * Echo user input back onto the screen. This must be called only when * L_ECHO(tty) is true. Called from the driver receive_buf path. * - * This variant tags control characters to be possibly echoed as - * as "^X" (where X is the letter representing the control char). + * This variant tags control characters to be echoed as "^X" + * (where X is the letter representing the control char). * * Locking: echo_lock to protect the echo buffer */ @@ -812,7 +813,7 @@ add_echo_byte(ECHO_OP_START, tty); add_echo_byte(ECHO_OP_START, tty); } else { - if (iscntrl(c) && c != '\t') + if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') add_echo_byte(ECHO_OP_START, tty); add_echo_byte(c, tty); } --- linux-ec2-2.6.31.orig/drivers/char/tty_io.c +++ linux-ec2-2.6.31/drivers/char/tty_io.c @@ -136,6 +136,8 @@ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +int console_use_vt = 1; + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -1184,6 +1186,7 @@ tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); return 0; } +EXPORT_SYMBOL_GPL(tty_init_termios); /** * tty_driver_install_tty() - install a tty entry in the driver @@ -1404,6 +1407,8 @@ list_del_init(&tty->tty_files); file_list_unlock(); + put_pid(tty->pgrp); + put_pid(tty->session); free_tty_struct(tty); } @@ -1717,7 +1722,7 @@ goto got_driver; } #ifdef CONFIG_VT - if (device == MKDEV(TTY_MAJOR, 0)) { + if (console_use_vt && device == MKDEV(TTY_MAJOR, 0)) { extern struct tty_driver *console_driver; driver = tty_driver_kref_get(console_driver); index = fg_console; @@ -3091,7 +3096,8 @@ "console"); #ifdef CONFIG_VT - vty_init(&console_fops); + if (console_use_vt) + vty_init(&console_fops); #endif return 0; } --- linux-ec2-2.6.31.orig/drivers/char/tty_buffer.c +++ linux-ec2-2.6.31/drivers/char/tty_buffer.c @@ -402,28 +402,26 @@ container_of(work, struct tty_struct, buf.work.work); unsigned long flags; struct tty_ldisc *disc; - struct tty_buffer *tbuf, *head; - char *char_buf; - unsigned char *flag_buf; disc = tty_ldisc_ref(tty); if (disc == NULL) /* !TTY_LDISC */ return; spin_lock_irqsave(&tty->buf.lock, flags); - /* So we know a flush is running */ - set_bit(TTY_FLUSHING, &tty->flags); - head = tty->buf.head; - if (head != NULL) { - tty->buf.head = NULL; - for (;;) { - int count = head->commit - head->read; + + if (!test_and_set_bit(TTY_FLUSHING, &tty->flags)) { + struct tty_buffer *head; + while ((head = tty->buf.head) != NULL) { + int count; + char *char_buf; + unsigned char *flag_buf; + + count = head->commit - head->read; if (!count) { if (head->next == NULL) break; - tbuf = head; - head = head->next; - tty_buffer_free(tty, tbuf); + tty->buf.head = head->next; + tty_buffer_free(tty, head); continue; } /* Ldisc or user is trying to flush the buffers @@ -445,9 +443,9 @@ flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); } - /* Restore the queue head */ - tty->buf.head = head; + clear_bit(TTY_FLUSHING, &tty->flags); } + /* We may have a deferred request to flush the input buffer, if so pull the chain under the lock and empty the queue */ if (test_bit(TTY_FLUSHPENDING, &tty->flags)) { @@ -455,7 +453,6 @@ clear_bit(TTY_FLUSHPENDING, &tty->flags); wake_up(&tty->read_wait); } - clear_bit(TTY_FLUSHING, &tty->flags); spin_unlock_irqrestore(&tty->buf.lock, flags); tty_ldisc_deref(disc); --- linux-ec2-2.6.31.orig/drivers/char/tty_port.c +++ linux-ec2-2.6.31/drivers/char/tty_port.c @@ -96,6 +96,14 @@ } EXPORT_SYMBOL(tty_port_tty_set); +static void tty_port_shutdown(struct tty_port *port) +{ + if (port->ops->shutdown && + test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags)) + port->ops->shutdown(port); + +} + /** * tty_port_hangup - hangup helper * @port: tty port @@ -116,6 +124,7 @@ port->tty = NULL; spin_unlock_irqrestore(&port->lock, flags); wake_up_interruptible(&port->open_wait); + tty_port_shutdown(port); } EXPORT_SYMBOL(tty_port_hangup); @@ -208,8 +217,14 @@ /* if non-blocking mode is set we can pass directly to open unless the port has just hung up or is in another error state */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { + if (tty->flags & (1 << TTY_IO_ERROR)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + if (filp->f_flags & O_NONBLOCK) { + /* Indicate we are open */ + if (tty->termios->c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -296,15 +311,17 @@ if (port->count) { spin_unlock_irqrestore(&port->lock, flags); + if (port->ops->drop) + port->ops->drop(port); return 0; } - port->flags |= ASYNC_CLOSING; + set_bit(ASYNCB_CLOSING, &port->flags); tty->closing = 1; spin_unlock_irqrestore(&port->lock, flags); /* Don't block on a stalled port, just pull the chain */ if (tty->flow_stopped) tty_driver_flush_buffer(tty); - if (port->flags & ASYNC_INITIALIZED && + if (test_bit(ASYNCB_INITIALIZED, &port->flags) && port->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, port->closing_wait); if (port->drain_delay) { @@ -318,6 +335,9 @@ timeout = 2 * HZ; schedule_timeout_interruptible(timeout); } + /* Don't call port->drop for the last reference. Callers will want + to drop the last active reference in ->shutdown() or the tty + shutdown path */ return 1; } EXPORT_SYMBOL(tty_port_close_start); @@ -348,3 +368,14 @@ spin_unlock_irqrestore(&port->lock, flags); } EXPORT_SYMBOL(tty_port_close_end); + +void tty_port_close(struct tty_port *port, struct tty_struct *tty, + struct file *filp) +{ + if (tty_port_close_start(port, tty, filp) == 0) + return; + tty_port_shutdown(port); + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); +} +EXPORT_SYMBOL(tty_port_close); --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm_vtpm.c +++ linux-ec2-2.6.31/drivers/char/tpm/tpm_vtpm.c @@ -0,0 +1,542 @@ +/* + * Copyright (C) 2006 IBM Corporation + * + * Authors: + * Stefan Berger + * + * Generic device driver part for device drivers in a virtualized + * environment. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include +#include "tpm.h" +#include "tpm_vtpm.h" + +/* read status bits */ +enum { + STATUS_BUSY = 0x01, + STATUS_DATA_AVAIL = 0x02, + STATUS_READY = 0x04 +}; + +struct transmission { + struct list_head next; + + unsigned char *request; + size_t request_len; + size_t request_buflen; + + unsigned char *response; + size_t response_len; + size_t response_buflen; + + unsigned int flags; +}; + +enum { + TRANSMISSION_FLAG_WAS_QUEUED = 0x1 +}; + + +enum { + DATAEX_FLAG_QUEUED_ONLY = 0x1 +}; + + +/* local variables */ + +/* local function prototypes */ +static int _vtpm_send_queued(struct tpm_chip *chip); + + +/* ============================================================= + * Some utility functions + * ============================================================= + */ +static void vtpm_state_init(struct vtpm_state *vtpms) +{ + vtpms->current_request = NULL; + spin_lock_init(&vtpms->req_list_lock); + init_waitqueue_head(&vtpms->req_wait_queue); + INIT_LIST_HEAD(&vtpms->queued_requests); + + vtpms->current_response = NULL; + spin_lock_init(&vtpms->resp_list_lock); + init_waitqueue_head(&vtpms->resp_wait_queue); + + vtpms->disconnect_time = jiffies; +} + + +static inline struct transmission *transmission_alloc(void) +{ + return kzalloc(sizeof(struct transmission), GFP_ATOMIC); +} + +static unsigned char * +transmission_set_req_buffer(struct transmission *t, + unsigned char *buffer, size_t len) +{ + if (t->request_buflen < len) { + kfree(t->request); + t->request = kmalloc(len, GFP_KERNEL); + if (!t->request) { + t->request_buflen = 0; + return NULL; + } + t->request_buflen = len; + } + + memcpy(t->request, buffer, len); + t->request_len = len; + + return t->request; +} + +static unsigned char * +transmission_set_res_buffer(struct transmission *t, + const unsigned char *buffer, size_t len) +{ + if (t->response_buflen < len) { + kfree(t->response); + t->response = kmalloc(len, GFP_ATOMIC); + if (!t->response) { + t->response_buflen = 0; + return NULL; + } + t->response_buflen = len; + } + + memcpy(t->response, buffer, len); + t->response_len = len; + + return t->response; +} + +static inline void transmission_free(struct transmission *t) +{ + kfree(t->request); + kfree(t->response); + kfree(t); +} + +/* ============================================================= + * Interface with the lower layer driver + * ============================================================= + */ +/* + * Lower layer uses this function to make a response available. + */ +int vtpm_vd_recv(const struct tpm_chip *chip, + const unsigned char *buffer, size_t count, + void *ptr) +{ + unsigned long flags; + int ret_size = 0; + struct transmission *t; + struct vtpm_state *vtpms; + + vtpms = (struct vtpm_state *)chip_get_private(chip); + + /* + * The list with requests must contain one request + * only and the element there must be the one that + * was passed to me from the front-end. + */ + spin_lock_irqsave(&vtpms->resp_list_lock, flags); + if (vtpms->current_request != ptr) { + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + return 0; + } + + if ((t = vtpms->current_request)) { + transmission_free(t); + vtpms->current_request = NULL; + } + + t = transmission_alloc(); + if (t) { + if (!transmission_set_res_buffer(t, buffer, count)) { + transmission_free(t); + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + return -ENOMEM; + } + ret_size = count; + vtpms->current_response = t; + wake_up_interruptible(&vtpms->resp_wait_queue); + } + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + + return ret_size; +} + + +/* + * Lower layer indicates its status (connected/disconnected) + */ +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status) +{ + struct vtpm_state *vtpms; + + vtpms = (struct vtpm_state *)chip_get_private(chip); + + vtpms->vd_status = vd_status; + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { + vtpms->disconnect_time = jiffies; + } +} + +/* ============================================================= + * Interface with the generic TPM driver + * ============================================================= + */ +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + int rc = 0; + unsigned long flags; + struct vtpm_state *vtpms; + + vtpms = (struct vtpm_state *)chip_get_private(chip); + + /* + * Check if the previous operation only queued the command + * In this case there won't be a response, so I just + * return from here and reset that flag. In any other + * case I should receive a response from the back-end. + */ + spin_lock_irqsave(&vtpms->resp_list_lock, flags); + if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) { + vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY; + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + /* + * The first few commands (measurements) must be + * queued since it might not be possible to talk to the + * TPM, yet. + * Return a response of up to 30 '0's. + */ + + count = min_t(size_t, count, 30); + memset(buf, 0x0, count); + return count; + } + /* + * Check whether something is in the responselist and if + * there's nothing in the list wait for something to appear. + */ + + if (!vtpms->current_response) { + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + interruptible_sleep_on_timeout(&vtpms->resp_wait_queue, + 1000); + spin_lock_irqsave(&vtpms->resp_list_lock ,flags); + } + + if (vtpms->current_response) { + struct transmission *t = vtpms->current_response; + vtpms->current_response = NULL; + rc = min(count, t->response_len); + memcpy(buf, t->response, rc); + transmission_free(t); + } + + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + return rc; +} + +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) +{ + int rc = 0; + unsigned long flags; + struct transmission *t = transmission_alloc(); + struct vtpm_state *vtpms; + + vtpms = (struct vtpm_state *)chip_get_private(chip); + + if (!t) + return -ENOMEM; + /* + * If there's a current request, it must be the + * previous request that has timed out. + */ + spin_lock_irqsave(&vtpms->req_list_lock, flags); + if (vtpms->current_request != NULL) { + printk("WARNING: Sending although there is a request outstanding.\n" + " Previous request must have timed out.\n"); + transmission_free(vtpms->current_request); + vtpms->current_request = NULL; + } + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); + + /* + * Queue the packet if the driver below is not + * ready, yet, or there is any packet already + * in the queue. + * If the driver below is ready, unqueue all + * packets first before sending our current + * packet. + * For each unqueued packet, except for the + * last (=current) packet, call the function + * tpm_xen_recv to wait for the response to come + * back. + */ + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) { + if (time_after(jiffies, + vtpms->disconnect_time + HZ * 10)) { + rc = -ENOENT; + } else { + goto queue_it; + } + } else { + /* + * Send all queued packets. + */ + if (_vtpm_send_queued(chip) == 0) { + + vtpms->current_request = t; + + rc = vtpm_vd_send(vtpms->tpm_private, + buf, + count, + t); + /* + * The generic TPM driver will call + * the function to receive the response. + */ + if (rc < 0) { + vtpms->current_request = NULL; + goto queue_it; + } + } else { +queue_it: + if (!transmission_set_req_buffer(t, buf, count)) { + transmission_free(t); + rc = -ENOMEM; + goto exit; + } + /* + * An error occurred. Don't event try + * to send the current request. Just + * queue it. + */ + spin_lock_irqsave(&vtpms->req_list_lock, flags); + vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY; + list_add_tail(&t->next, &vtpms->queued_requests); + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); + } + } + +exit: + return rc; +} + + +/* + * Send all queued requests. + */ +static int _vtpm_send_queued(struct tpm_chip *chip) +{ + int rc; + int error = 0; + unsigned long flags; + unsigned char buffer[1]; + struct vtpm_state *vtpms; + vtpms = (struct vtpm_state *)chip_get_private(chip); + + spin_lock_irqsave(&vtpms->req_list_lock, flags); + + while (!list_empty(&vtpms->queued_requests)) { + /* + * Need to dequeue them. + * Read the result into a dummy buffer. + */ + struct transmission *qt = (struct transmission *) + vtpms->queued_requests.next; + list_del(&qt->next); + vtpms->current_request = qt; + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); + + rc = vtpm_vd_send(vtpms->tpm_private, + qt->request, + qt->request_len, + qt); + + if (rc < 0) { + spin_lock_irqsave(&vtpms->req_list_lock, flags); + if ((qt = vtpms->current_request) != NULL) { + /* + * requeue it at the beginning + * of the list + */ + list_add(&qt->next, + &vtpms->queued_requests); + } + vtpms->current_request = NULL; + error = 1; + break; + } + /* + * After this point qt is not valid anymore! + * It is freed when the front-end is delivering + * the data by calling tpm_recv + */ + /* + * Receive response into provided dummy buffer + */ + rc = vtpm_recv(chip, buffer, sizeof(buffer)); + spin_lock_irqsave(&vtpms->req_list_lock, flags); + } + + spin_unlock_irqrestore(&vtpms->req_list_lock, flags); + + return error; +} + +static void vtpm_cancel(struct tpm_chip *chip) +{ + unsigned long flags; + struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip); + + spin_lock_irqsave(&vtpms->resp_list_lock,flags); + + if (!vtpms->current_response && vtpms->current_request) { + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + interruptible_sleep_on(&vtpms->resp_wait_queue); + spin_lock_irqsave(&vtpms->resp_list_lock,flags); + } + + if (vtpms->current_response) { + struct transmission *t = vtpms->current_response; + vtpms->current_response = NULL; + transmission_free(t); + } + + spin_unlock_irqrestore(&vtpms->resp_list_lock,flags); +} + +static u8 vtpm_status(struct tpm_chip *chip) +{ + u8 rc = 0; + unsigned long flags; + struct vtpm_state *vtpms; + + vtpms = (struct vtpm_state *)chip_get_private(chip); + + spin_lock_irqsave(&vtpms->resp_list_lock, flags); + /* + * Data are available if: + * - there's a current response + * - the last packet was queued only (this is fake, but necessary to + * get the generic TPM layer to call the receive function.) + */ + if (vtpms->current_response || + 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) { + rc = STATUS_DATA_AVAIL; + } else if (!vtpms->current_response && !vtpms->current_request) { + rc = STATUS_READY; + } + + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags); + return rc; +} + +static struct file_operations vtpm_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, + NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel); + +static struct attribute *vtpm_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + NULL, +}; + +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs }; + +#define TPM_LONG_TIMEOUT (10 * 60 * HZ) + +static struct tpm_vendor_specific tpm_vtpm = { + .recv = vtpm_recv, + .send = vtpm_send, + .cancel = vtpm_cancel, + .status = vtpm_status, + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL, + .req_complete_val = STATUS_DATA_AVAIL, + .req_canceled = STATUS_READY, + .attr_group = &vtpm_attr_grp, + .miscdev = { + .fops = &vtpm_ops, + }, + .duration = { + TPM_LONG_TIMEOUT, + TPM_LONG_TIMEOUT, + TPM_LONG_TIMEOUT, + }, +}; + +struct tpm_chip *init_vtpm(struct device *dev, + struct tpm_private *tp) +{ + long rc; + struct tpm_chip *chip; + struct vtpm_state *vtpms; + + vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL); + if (!vtpms) + return ERR_PTR(-ENOMEM); + + vtpm_state_init(vtpms); + vtpms->tpm_private = tp; + + chip = tpm_register_hardware(dev, &tpm_vtpm); + if (!chip) { + rc = -ENODEV; + goto err_free_mem; + } + + chip_set_private(chip, vtpms); + + return chip; + +err_free_mem: + kfree(vtpms); + + return ERR_PTR(rc); +} + +void cleanup_vtpm(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip); + tpm_remove_hardware(dev); + kfree(vtpms); +} --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm_vtpm.h +++ linux-ec2-2.6.31/drivers/char/tpm/tpm_vtpm.h @@ -0,0 +1,55 @@ +#ifndef TPM_VTPM_H +#define TPM_VTPM_H + +struct tpm_chip; +struct tpm_private; + +struct vtpm_state { + struct transmission *current_request; + spinlock_t req_list_lock; + wait_queue_head_t req_wait_queue; + + struct list_head queued_requests; + + struct transmission *current_response; + spinlock_t resp_list_lock; + wait_queue_head_t resp_wait_queue; // processes waiting for responses + + u8 vd_status; + u8 flags; + + unsigned long disconnect_time; + + /* + * The following is a private structure of the underlying + * driver. It is passed as parameter in the send function. + */ + struct tpm_private *tpm_private; +}; + + +enum vdev_status { + TPM_VD_STATUS_DISCONNECTED = 0x0, + TPM_VD_STATUS_CONNECTED = 0x1 +}; + +/* this function is called from tpm_vtpm.c */ +int vtpm_vd_send(struct tpm_private * tp, + const u8 * buf, size_t count, void *ptr); + +/* these functions are offered by tpm_vtpm.c */ +struct tpm_chip *init_vtpm(struct device *, + struct tpm_private *); +void cleanup_vtpm(struct device *); +int vtpm_vd_recv(const struct tpm_chip* chip, + const unsigned char *buffer, size_t count, void *ptr); +void vtpm_vd_status(const struct tpm_chip *, u8 status); + +static inline struct tpm_private *tpm_private_from_dev(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct vtpm_state *vtpms = chip_get_private(chip); + return vtpms->tpm_private; +} + +#endif --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm.c +++ linux-ec2-2.6.31/drivers/char/tpm/tpm.c @@ -696,8 +696,7 @@ cmd.header.in = pcrread_header; cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx); - BUILD_BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE); - rc = transmit_cmd(chip, &cmd, cmd.header.in.length, + rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, "attempting to read a pcr value"); if (rc == 0) @@ -742,7 +741,7 @@ * the module usage count. */ #define TPM_ORD_PCR_EXTEND cpu_to_be32(20) -#define EXTEND_PCR_SIZE 34 +#define EXTEND_PCR_RESULT_SIZE 34 static struct tpm_input_header pcrextend_header = { .tag = TPM_TAG_RQU_COMMAND, .length = cpu_to_be32(34), @@ -760,10 +759,9 @@ return -ENODEV; cmd.header.in = pcrextend_header; - BUILD_BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE); cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx); memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE); - rc = transmit_cmd(chip, &cmd, cmd.header.in.length, + rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, "attempting extend a PCR value"); module_put(chip->dev->driver->owner); --- linux-ec2-2.6.31.orig/drivers/char/tpm/Kconfig +++ linux-ec2-2.6.31/drivers/char/tpm/Kconfig @@ -58,4 +58,13 @@ Further information on this driver and the supported hardware can be found at http://www.prosec.rub.de/tpm +config TCG_XEN + tristate "XEN TPM Interface" + depends on XEN + ---help--- + If you want to make TPM support available to a Xen user domain, + say Yes and it will be accessible from within Linux. + To compile this driver as a module, choose M here; the module + will be called tpm_xenu. + endif # TCG_TPM --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm_tis.c +++ linux-ec2-2.6.31/drivers/char/tpm/tpm_tis.c @@ -450,6 +450,12 @@ goto out_err; } + /* Default timeouts */ + chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); + chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); + chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT); + chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT); + if (request_locality(chip, 0) != 0) { rc = -ENODEV; goto out_err; @@ -457,12 +463,6 @@ vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); - /* Default timeouts */ - chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); - chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); - chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT); - chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT); - dev_info(dev, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm.h +++ linux-ec2-2.6.31/drivers/char/tpm/tpm.h @@ -108,6 +108,9 @@ struct dentry **bios_dir; struct list_head list; +#ifdef CONFIG_XEN + void *priv; +#endif void (*release) (struct device *); }; @@ -266,6 +269,18 @@ ssize_t tpm_getcap(struct device *, __be32, cap_t *, const char *); +#ifdef CONFIG_XEN +static inline void *chip_get_private(const struct tpm_chip *chip) +{ + return chip->priv; +} + +static inline void chip_set_private(struct tpm_chip *chip, void *priv) +{ + chip->priv = priv; +} +#endif + extern void tpm_get_timeouts(struct tpm_chip *); extern void tpm_gen_interrupt(struct tpm_chip *); extern void tpm_continue_selftest(struct tpm_chip *); --- linux-ec2-2.6.31.orig/drivers/char/tpm/Makefile +++ linux-ec2-2.6.31/drivers/char/tpm/Makefile @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o +tpm_xenu-y = tpm_xen.o tpm_vtpm.o --- linux-ec2-2.6.31.orig/drivers/char/tpm/tpm_xen.c +++ linux-ec2-2.6.31/drivers/char/tpm/tpm_xen.c @@ -0,0 +1,720 @@ +/* + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from drivers/xen/netfront/netfront.c + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tpm.h" +#include "tpm_vtpm.h" + +#undef DEBUG + +/* local structures */ +struct tpm_private { + struct tpm_chip *chip; + + tpmif_tx_interface_t *tx; + atomic_t refcnt; + unsigned int irq; + u8 is_connected; + u8 is_suspended; + + spinlock_t tx_lock; + + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE]; + + atomic_t tx_busy; + void *tx_remember; + + domid_t backend_id; + wait_queue_head_t wait_q; + + struct xenbus_device *dev; + int ring_ref; +}; + +struct tx_buffer { + unsigned int size; // available space in data + unsigned int len; // used space in data + unsigned char *data; // pointer to a page +}; + + +/* locally visible variables */ +static grant_ref_t gref_head; +static struct tpm_private *my_priv; + +/* local function prototypes */ +static irqreturn_t tpmif_int(int irq, + void *tpm_priv); +static void tpmif_rx_action(unsigned long unused); +static int tpmif_connect(struct xenbus_device *dev, + struct tpm_private *tp, + domid_t domid); +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0); +static int tpmif_allocate_tx_buffers(struct tpm_private *tp); +static void tpmif_free_tx_buffers(struct tpm_private *tp); +static void tpmif_set_connected_state(struct tpm_private *tp, + u8 newstate); +static int tpm_xmit(struct tpm_private *tp, + const u8 * buf, size_t count, int userbuffer, + void *remember); +static void destroy_tpmring(struct tpm_private *tp); +void __exit tpmif_exit(void); + +#define DPRINTK(fmt, args...) \ + pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) + +#define GRANT_INVALID_REF 0 + + +static inline int +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len, + int isuserbuffer) +{ + int copied = len; + + if (len > txb->size) + copied = txb->size; + if (isuserbuffer) { + if (copy_from_user(txb->data, src, copied)) + return -EFAULT; + } else { + memcpy(txb->data, src, copied); + } + txb->len = len; + return copied; +} + +static inline struct tx_buffer *tx_buffer_alloc(void) +{ + struct tx_buffer *txb; + + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL); + if (!txb) + return NULL; + + txb->len = 0; + txb->size = PAGE_SIZE; + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); + if (txb->data == NULL) { + kfree(txb); + txb = NULL; + } + + return txb; +} + + +static inline void tx_buffer_free(struct tx_buffer *txb) +{ + if (txb) { + free_page((long)txb->data); + kfree(txb); + } +} + +/************************************************************** + Utility function for the tpm_private structure +**************************************************************/ +static void tpm_private_init(struct tpm_private *tp) +{ + spin_lock_init(&tp->tx_lock); + init_waitqueue_head(&tp->wait_q); + atomic_set(&tp->refcnt, 1); +} + +static void tpm_private_put(void) +{ + if (!atomic_dec_and_test(&my_priv->refcnt)) + return; + + tpmif_free_tx_buffers(my_priv); + kfree(my_priv); + my_priv = NULL; +} + +static struct tpm_private *tpm_private_get(void) +{ + int err; + + if (my_priv) { + atomic_inc(&my_priv->refcnt); + return my_priv; + } + + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); + if (!my_priv) + return NULL; + + tpm_private_init(my_priv); + err = tpmif_allocate_tx_buffers(my_priv); + if (err < 0) + tpm_private_put(); + + return my_priv; +} + +/************************************************************** + + The interface to let the tpm plugin register its callback + function and send data to another partition using this module + +**************************************************************/ + +static DEFINE_MUTEX(suspend_lock); +/* + * Send data via this module by calling this function + */ +int vtpm_vd_send(struct tpm_private *tp, + const u8 * buf, size_t count, void *ptr) +{ + int sent; + + mutex_lock(&suspend_lock); + sent = tpm_xmit(tp, buf, count, 0, ptr); + mutex_unlock(&suspend_lock); + + return sent; +} + +/************************************************************** + XENBUS support code +**************************************************************/ + +static int setup_tpmring(struct xenbus_device *dev, + struct tpm_private *tp) +{ + tpmif_tx_interface_t *sring; + int err; + + tp->ring_ref = GRANT_INVALID_REF; + + sring = (void *)__get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + tp->tx = sring; + + err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx)); + if (err < 0) { + free_page((unsigned long)sring); + tp->tx = NULL; + xenbus_dev_fatal(dev, err, "allocating grant reference"); + goto fail; + } + tp->ring_ref = err; + + err = tpmif_connect(dev, tp, dev->otherend_id); + if (err) + goto fail; + + return 0; +fail: + destroy_tpmring(tp); + return err; +} + + +static void destroy_tpmring(struct tpm_private *tp) +{ + tpmif_set_connected_state(tp, 0); + + if (tp->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx); + tp->ring_ref = GRANT_INVALID_REF; + tp->tx = NULL; + } + + if (tp->irq) + unbind_from_irqhandler(tp->irq, tp); + + tp->irq = 0; +} + + +static int talk_to_backend(struct xenbus_device *dev, + struct tpm_private *tp) +{ + const char *message = NULL; + int err; + struct xenbus_transaction xbt; + + err = setup_tpmring(dev, tp); + if (err) { + xenbus_dev_fatal(dev, err, "setting up ring"); + goto out; + } + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_tpmring; + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-ref","%u", tp->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(tp->irq)); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_tpmring; + } + + xenbus_switch_state(dev, XenbusStateConnected); + + return 0; + +abort_transaction: + xenbus_transaction_end(xbt, 1); + if (message) + xenbus_dev_error(dev, err, "%s", message); +destroy_tpmring: + destroy_tpmring(tp); +out: + return err; +} + +/** + * Callback received when the backend's state changes. + */ +static void backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); + DPRINTK("\n"); + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateUnknown: + break; + + case XenbusStateConnected: + tpmif_set_connected_state(tp, 1); + break; + + case XenbusStateClosing: + tpmif_set_connected_state(tp, 0); + xenbus_frontend_closed(dev); + break; + + case XenbusStateClosed: + tpmif_set_connected_state(tp, 0); + if (tp->is_suspended == 0) + device_unregister(&dev->dev); + xenbus_frontend_closed(dev); + break; + } +} + +static int tpmfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + int handle; + struct tpm_private *tp = tpm_private_get(); + + if (!tp) + return -ENOMEM; + + tp->chip = init_vtpm(&dev->dev, tp); + if (IS_ERR(tp->chip)) + return PTR_ERR(tp->chip); + + err = xenbus_scanf(XBT_NIL, dev->nodename, + "handle", "%i", &handle); + if (XENBUS_EXIST_ERR(err)) + return err; + + if (err < 0) { + xenbus_dev_fatal(dev,err,"reading virtual-device"); + return err; + } + + tp->dev = dev; + + err = talk_to_backend(dev, tp); + if (err) { + tpm_private_put(); + return err; + } + + return 0; +} + + +static int tpmfront_remove(struct xenbus_device *dev) +{ + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); + destroy_tpmring(tp); + cleanup_vtpm(&dev->dev); + return 0; +} + +static int tpmfront_suspend(struct xenbus_device *dev) +{ + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); + u32 ctr; + + /* Take the lock, preventing any application from sending. */ + mutex_lock(&suspend_lock); + tp->is_suspended = 1; + + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) { + if ((ctr % 10) == 0) + printk("TPM-FE [INFO]: Waiting for outstanding " + "request.\n"); + /* Wait for a request to be responded to. */ + interruptible_sleep_on_timeout(&tp->wait_q, 100); + } + + return 0; +} + +static int tpmfront_suspend_finish(struct tpm_private *tp) +{ + tp->is_suspended = 0; + /* Allow applications to send again. */ + mutex_unlock(&suspend_lock); + return 0; +} + +static int tpmfront_suspend_cancel(struct xenbus_device *dev) +{ + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); + return tpmfront_suspend_finish(tp); +} + +static int tpmfront_resume(struct xenbus_device *dev) +{ + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); + destroy_tpmring(tp); + return talk_to_backend(dev, tp); +} + +static int tpmif_connect(struct xenbus_device *dev, + struct tpm_private *tp, + domid_t domid) +{ + int err; + + tp->backend_id = domid; + + err = bind_listening_port_to_irqhandler( + domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp); + if (err <= 0) { + WPRINTK("bind_listening_port_to_irqhandler failed " + "(err=%d)\n", err); + return err; + } + tp->irq = err; + + return 0; +} + +static struct xenbus_device_id tpmfront_ids[] = { + { "vtpm" }, + { "" } +}; + +static struct xenbus_driver tpmfront = { + .name = "vtpm", + .ids = tpmfront_ids, + .probe = tpmfront_probe, + .remove = tpmfront_remove, + .resume = tpmfront_resume, + .otherend_changed = backend_changed, + .suspend = tpmfront_suspend, + .suspend_cancel = tpmfront_suspend_cancel, +}; + +static int __init init_tpm_xenbus(void) +{ + return xenbus_register_frontend(&tpmfront); +} + +static int tpmif_allocate_tx_buffers(struct tpm_private *tp) +{ + unsigned int i; + + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { + tp->tx_buffers[i] = tx_buffer_alloc(); + if (!tp->tx_buffers[i]) { + tpmif_free_tx_buffers(tp); + return -ENOMEM; + } + } + return 0; +} + +static void tpmif_free_tx_buffers(struct tpm_private *tp) +{ + unsigned int i; + + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) + tx_buffer_free(tp->tx_buffers[i]); +} + +static void tpmif_rx_action(unsigned long priv) +{ + struct tpm_private *tp = (struct tpm_private *)priv; + int i = 0; + unsigned int received; + unsigned int offset = 0; + u8 *buffer; + tpmif_tx_request_t *tx = &tp->tx->ring[i].req; + + atomic_set(&tp->tx_busy, 0); + wake_up_interruptible(&tp->wait_q); + + received = tx->size; + + buffer = kmalloc(received, GFP_ATOMIC); + if (!buffer) + return; + + for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) { + struct tx_buffer *txb = tp->tx_buffers[i]; + tpmif_tx_request_t *tx; + unsigned int tocopy; + + tx = &tp->tx->ring[i].req; + tocopy = tx->size; + if (tocopy > PAGE_SIZE) + tocopy = PAGE_SIZE; + + memcpy(&buffer[offset], txb->data, tocopy); + + gnttab_release_grant_reference(&gref_head, tx->ref); + + offset += tocopy; + } + + vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember); + kfree(buffer); +} + + +static irqreturn_t tpmif_int(int irq, void *tpm_priv) +{ + struct tpm_private *tp = tpm_priv; + unsigned long flags; + + spin_lock_irqsave(&tp->tx_lock, flags); + tpmif_rx_tasklet.data = (unsigned long)tp; + tasklet_schedule(&tpmif_rx_tasklet); + spin_unlock_irqrestore(&tp->tx_lock, flags); + + return IRQ_HANDLED; +} + + +static int tpm_xmit(struct tpm_private *tp, + const u8 * buf, size_t count, int isuserbuffer, + void *remember) +{ + tpmif_tx_request_t *tx; + TPMIF_RING_IDX i; + unsigned int offset = 0; + + spin_lock_irq(&tp->tx_lock); + + if (unlikely(atomic_read(&tp->tx_busy))) { + printk("tpm_xmit: There's an outstanding request/response " + "on the way!\n"); + spin_unlock_irq(&tp->tx_lock); + return -EBUSY; + } + + if (tp->is_connected != 1) { + spin_unlock_irq(&tp->tx_lock); + return -EIO; + } + + for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) { + struct tx_buffer *txb = tp->tx_buffers[i]; + int copied; + + if (!txb) { + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n" + "Not transmitting anything!\n", i); + spin_unlock_irq(&tp->tx_lock); + return -EFAULT; + } + + copied = tx_buffer_copy(txb, &buf[offset], count, + isuserbuffer); + if (copied < 0) { + /* An error occurred */ + spin_unlock_irq(&tp->tx_lock); + return copied; + } + count -= copied; + offset += copied; + + tx = &tp->tx->ring[i].req; + tx->addr = virt_to_machine(txb->data); + tx->size = txb->len; + tx->unused = 0; + + DPRINTK("First 4 characters sent by TPM-FE are " + "0x%02x 0x%02x 0x%02x 0x%02x\n", + txb->data[0],txb->data[1],txb->data[2],txb->data[3]); + + /* Get the granttable reference for this page. */ + tx->ref = gnttab_claim_grant_reference(&gref_head); + if (tx->ref == -ENOSPC) { + spin_unlock_irq(&tp->tx_lock); + DPRINTK("Grant table claim reference failed in " + "func:%s line:%d file:%s\n", + __FUNCTION__, __LINE__, __FILE__); + return -ENOSPC; + } + gnttab_grant_foreign_access_ref(tx->ref, + tp->backend_id, + virt_to_mfn(txb->data), + 0 /*RW*/); + wmb(); + } + + atomic_set(&tp->tx_busy, 1); + tp->tx_remember = remember; + + mb(); + + notify_remote_via_irq(tp->irq); + + spin_unlock_irq(&tp->tx_lock); + return offset; +} + + +static void tpmif_notify_upperlayer(struct tpm_private *tp) +{ + /* Notify upper layer about the state of the connection to the BE. */ + vtpm_vd_status(tp->chip, (tp->is_connected + ? TPM_VD_STATUS_CONNECTED + : TPM_VD_STATUS_DISCONNECTED)); +} + + +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected) +{ + /* + * Don't notify upper layer if we are in suspend mode and + * should disconnect - assumption is that we will resume + * The mutex keeps apps from sending. + */ + if (is_connected == 0 && tp->is_suspended == 1) + return; + + /* + * Unlock the mutex if we are connected again + * after being suspended - now resuming. + * This also removes the suspend state. + */ + if (is_connected == 1 && tp->is_suspended == 1) + tpmfront_suspend_finish(tp); + + if (is_connected != tp->is_connected) { + tp->is_connected = is_connected; + tpmif_notify_upperlayer(tp); + } +} + + + +/* ================================================================= + * Initialization function. + * ================================================================= + */ + + +static int __init tpmif_init(void) +{ + struct tpm_private *tp; + + if (is_initial_xendomain()) + return -EPERM; + + tp = tpm_private_get(); + if (!tp) + return -ENOMEM; + + IPRINTK("Initialising the vTPM driver.\n"); + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE, + &gref_head) < 0) { + tpm_private_put(); + return -EFAULT; + } + + init_tpm_xenbus(); + return 0; +} + + +module_init(tpmif_init); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/char/agp/backend.c +++ linux-ec2-2.6.31/drivers/char/agp/backend.c @@ -149,7 +149,7 @@ return -ENOMEM; } - bridge->scratch_page_real = phys_to_gart(page_to_phys(page)); + bridge->scratch_page_real = page_to_gart(page); bridge->scratch_page = bridge->driver->mask_memory(bridge, page, 0); } --- linux-ec2-2.6.31.orig/drivers/char/agp/parisc-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/parisc-agp.c @@ -199,7 +199,7 @@ parisc_agp_page_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); return SBA_PDIR_VALID_BIT | addr; } --- linux-ec2-2.6.31.orig/drivers/char/agp/intel-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/intel-agp.c @@ -36,6 +36,8 @@ #define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2 #define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0 #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 +#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40 +#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 #define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00 @@ -50,6 +52,7 @@ #define PCI_DEVICE_ID_INTEL_IGDNG_D_IG 0x0042 #define PCI_DEVICE_ID_INTEL_IGDNG_M_HB 0x0044 #define PCI_DEVICE_ID_INTEL_IGDNG_MA_HB 0x0062 +#define PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB 0x006a #define PCI_DEVICE_ID_INTEL_IGDNG_M_IG 0x0046 /* cover 915 and 945 variants */ @@ -81,9 +84,11 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \ - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB) + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB) extern int agp_memory_reserved; @@ -267,8 +272,19 @@ if (page == NULL) return NULL; +#ifdef CONFIG_XEN + if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) { + __free_pages(page, 2); + return NULL; + } +#endif + if (set_pages_uc(page, 4) < 0) { set_pages_wb(page, 4); +#ifdef CONFIG_XEN + xen_destroy_contiguous_region((unsigned long)page_address(page), + 2); +#endif __free_pages(page, 2); return NULL; } @@ -283,6 +299,9 @@ return; set_pages_wb(page, 4); +#ifdef CONFIG_XEN + xen_destroy_contiguous_region((unsigned long)page_address(page), 2); +#endif put_page(page); __free_pages(page, 2); atomic_dec(&agp_bridge->current_memory_agp); @@ -418,7 +437,11 @@ new->page_count = pg_count; new->num_scratch_pages = pg_count; new->type = AGP_PHYS_MEMORY; +#ifndef CONFIG_XEN new->physical = page_to_phys(new->pages[0]); +#else + new->physical = page_to_pseudophys(new->pages[0]); +#endif return new; } @@ -465,7 +488,7 @@ static unsigned long intel_i810_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); /* Type checking must be done elsewhere */ return addr | bridge->driver->masks[type].mask; } @@ -679,23 +702,39 @@ if (!intel_private.i8xx_page) return; - /* make page uncached */ - map_page_into_agp(intel_private.i8xx_page); - intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page); if (!intel_private.i8xx_flush_page) intel_i830_fini_flush(); } +static void +do_wbinvd(void *null) +{ + wbinvd(); +} + +/* The chipset_flush interface needs to get data that has already been + * flushed out of the CPU all the way out to main memory, because the GPU + * doesn't snoop those buffers. + * + * The 8xx series doesn't have the same lovely interface for flushing the + * chipset write buffers that the later chips do. According to the 865 + * specs, it's 64 octwords, or 1KB. So, to get those previous things in + * that buffer out, we just fill 1KB and clflush it out, on the assumption + * that it'll push whatever was in there out. It appears to work. + */ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge) { unsigned int *pg = intel_private.i8xx_flush_page; - int i; - for (i = 0; i < 256; i += 2) - *(pg + i) = i; + memset(pg, 0, 1024); - wmb(); + if (cpu_has_clflush) { + clflush_cache_range(pg, 1024); + } else { + if (on_each_cpu(do_wbinvd, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for cache flush.\n"); + } } /* The intel i830 automatically initializes the agp aperture during POST. @@ -1200,7 +1239,7 @@ static unsigned long intel_i965_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - dma_addr_t addr = phys_to_gart(page_to_phys(page)); + dma_addr_t addr = page_to_gart(page); /* Shift high bits down */ addr |= (addr >> 28) & 0xf0; @@ -1216,9 +1255,11 @@ case PCI_DEVICE_ID_INTEL_Q45_HB: case PCI_DEVICE_ID_INTEL_G45_HB: case PCI_DEVICE_ID_INTEL_G41_HB: + case PCI_DEVICE_ID_INTEL_B43_HB: case PCI_DEVICE_ID_INTEL_IGDNG_D_HB: case PCI_DEVICE_ID_INTEL_IGDNG_M_HB: case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB: + case PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB: *gtt_offset = *gtt_size = MB(2); break; default: @@ -2192,6 +2233,8 @@ "Q45/Q43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0, "G45/G43", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0, + "B43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0, "G41", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0, @@ -2200,6 +2243,8 @@ "IGDNG/M", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IGDNG_MA_HB, PCI_DEVICE_ID_INTEL_IGDNG_M_IG, 0, "IGDNG/MA", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB, PCI_DEVICE_ID_INTEL_IGDNG_M_IG, 0, + "IGDNG/MC2", NULL, &intel_i965_driver }, { 0, 0, 0, NULL, NULL, NULL } }; @@ -2313,15 +2358,6 @@ struct agp_bridge_data *bridge = pci_get_drvdata(pdev); int ret_val; - pci_restore_state(pdev); - - /* We should restore our graphics device's config space, - * as host bridge (00:00) resumes before graphics device (02:00), - * then our access to its pci space can work right. - */ - if (intel_private.pcidev) - pci_restore_state(intel_private.pcidev); - if (bridge->driver == &intel_generic_driver) intel_configure(); else if (bridge->driver == &intel_850_driver) @@ -2401,9 +2437,11 @@ ID(PCI_DEVICE_ID_INTEL_Q45_HB), ID(PCI_DEVICE_ID_INTEL_G45_HB), ID(PCI_DEVICE_ID_INTEL_G41_HB), + ID(PCI_DEVICE_ID_INTEL_B43_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB), + ID(PCI_DEVICE_ID_INTEL_IGDNG_MC2_HB), { } }; @@ -2434,5 +2472,6 @@ module_init(agp_intel_init); module_exit(agp_intel_cleanup); +MODULE_EXPORT(intel_agp); MODULE_AUTHOR("Dave Jones "); MODULE_LICENSE("GPL and additional rights"); --- linux-ec2-2.6.31.orig/drivers/char/agp/ali-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/ali-agp.c @@ -152,7 +152,7 @@ pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp); pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, (((temp & ALI_CACHE_FLUSH_ADDR_MASK) | - phys_to_gart(page_to_phys(page))) | ALI_CACHE_FLUSH_EN )); + page_to_gart(page)) | ALI_CACHE_FLUSH_EN )); return page; } @@ -180,7 +180,7 @@ pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp); pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, (((temp & ALI_CACHE_FLUSH_ADDR_MASK) | - phys_to_gart(page_to_phys(page))) | ALI_CACHE_FLUSH_EN)); + page_to_gart(page)) | ALI_CACHE_FLUSH_EN)); } agp_generic_destroy_page(page, flags); } --- linux-ec2-2.6.31.orig/drivers/char/agp/uninorth-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/uninorth-agp.c @@ -174,8 +174,8 @@ for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { agp_bridge->gatt_table[j] = cpu_to_le32((page_to_phys(mem->pages[i]) & 0xFFFFF000UL) | 0x1UL); - flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])), - (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000); + flush_dcache_range((unsigned long)page_address(mem->pages[i]), + (unsigned long)page_address(mem->pages[i])+0x1000); } (void)in_le32((volatile u32*)&agp_bridge->gatt_table[pg_start]); mb(); @@ -220,8 +220,8 @@ for (i = 0; i < mem->page_count; i++) { gp[i] = (page_to_phys(mem->pages[i]) >> PAGE_SHIFT) | 0x80000000UL; - flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])), - (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000); + flush_dcache_range((unsigned long)page_address(mem->pages[i]), + (unsigned long)page_address(mem->pages[i])+0x1000); } mb(); flush_dcache_range((unsigned long)gp, (unsigned long) &gp[i]); --- linux-ec2-2.6.31.orig/drivers/char/agp/sgi-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/sgi-agp.c @@ -73,7 +73,7 @@ sgi_tioca_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); return tioca_physpage_to_gart(addr); } --- linux-ec2-2.6.31.orig/drivers/char/agp/efficeon-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/efficeon-agp.c @@ -67,7 +67,7 @@ /* This function does the same thing as mask_memory() for this chipset... */ static inline unsigned long efficeon_mask_memory(struct page *page) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); return addr | 0x00000001; } --- linux-ec2-2.6.31.orig/drivers/char/agp/generic.c +++ linux-ec2-2.6.31/drivers/char/agp/generic.c @@ -1349,7 +1349,7 @@ unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); /* memory type is ignored in the generic routine */ if (bridge->driver->masks) return addr | bridge->driver->masks[0].mask; --- linux-ec2-2.6.31.orig/drivers/char/agp/hp-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/hp-agp.c @@ -397,7 +397,7 @@ hp_zx1_mask_memory (struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); return HP_ZX1_PDIR_VALID_BIT | addr; } --- linux-ec2-2.6.31.orig/drivers/char/agp/i460-agp.c +++ linux-ec2-2.6.31/drivers/char/agp/i460-agp.c @@ -325,7 +325,7 @@ io_page_size = 1UL << I460_IO_PAGE_SHIFT; for (i = 0, j = io_pg_start; i < mem->page_count; i++) { - paddr = phys_to_gart(page_to_phys(mem->pages[i])); + paddr = page_to_gart(mem->pages[i]); for (k = 0; k < I460_IOPAGES_PER_KPAGE; k++, j++, paddr += io_page_size) WR_GATT(j, i460_mask_memory(agp_bridge, paddr, mem->type)); } @@ -382,7 +382,7 @@ return -ENOMEM; } - lp->paddr = phys_to_gart(page_to_phys(lp->page)); + lp->paddr = page_to_gart(lp->page); lp->refcount = 0; atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp); return 0; @@ -556,7 +556,7 @@ static unsigned long i460_page_mask_memory(struct agp_bridge_data *bridge, struct page *page, int type) { - unsigned long addr = phys_to_gart(page_to_phys(page)); + unsigned long addr = page_to_gart(page); return i460_mask_memory(bridge, addr, type); } --- linux-ec2-2.6.31.orig/drivers/char/agp/agp.h +++ linux-ec2-2.6.31/drivers/char/agp/agp.h @@ -31,6 +31,10 @@ #include /* for flush_agp_cache() */ +#ifndef page_to_gart +#define page_to_gart(x) phys_to_gart(page_to_phys(x)) +#endif + #define PFX "agpgart: " //#define AGP_DEBUG 1 --- linux-ec2-2.6.31.orig/drivers/bluetooth/dtl1_cs.c +++ linux-ec2-2.6.31/drivers/bluetooth/dtl1_cs.c @@ -299,7 +299,10 @@ int iir, lsr; irqreturn_t r = IRQ_NONE; - BUG_ON(!info->hdev); + if (!info || !info->hdev) { + BT_ERR("Call of irq %d for unknown device", irq); + return IRQ_NONE; + } iobase = info->p_dev->io.BasePort1; --- linux-ec2-2.6.31.orig/drivers/bluetooth/bluecard_cs.c +++ linux-ec2-2.6.31/drivers/bluetooth/bluecard_cs.c @@ -503,7 +503,10 @@ unsigned int iobase; unsigned char reg; - BUG_ON(!info->hdev); + if (!info || !info->hdev) { + BT_ERR("Call of irq %d for unknown device", irq); + return IRQ_NONE; + } if (!test_bit(CARD_READY, &(info->hw_state))) return IRQ_HANDLED; --- linux-ec2-2.6.31.orig/drivers/bluetooth/btuart_cs.c +++ linux-ec2-2.6.31/drivers/bluetooth/btuart_cs.c @@ -295,7 +295,10 @@ int iir, lsr; irqreturn_t r = IRQ_NONE; - BUG_ON(!info->hdev); + if (!info || !info->hdev) { + BT_ERR("Call of irq %d for unknown device", irq); + return IRQ_NONE; + } iobase = info->p_dev->io.BasePort1; --- linux-ec2-2.6.31.orig/drivers/bluetooth/bt3c_cs.c +++ linux-ec2-2.6.31/drivers/bluetooth/bt3c_cs.c @@ -345,7 +345,10 @@ int iir; irqreturn_t r = IRQ_NONE; - BUG_ON(!info->hdev); + if (!info || !info->hdev) { + BT_ERR("Call of irq %d for unknown device", irq); + return IRQ_NONE; + } iobase = info->p_dev->io.BasePort1; --- linux-ec2-2.6.31.orig/drivers/dma/at_hdmac.c +++ linux-ec2-2.6.31/drivers/dma/at_hdmac.c @@ -813,7 +813,7 @@ dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(atchan->lock); + spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; last_used = chan->cookie; @@ -828,7 +828,7 @@ ret = dma_async_is_complete(cookie, last_complete, last_used); } - spin_unlock_bh(atchan->lock); + spin_unlock_bh(&atchan->lock); if (done) *done = last_complete; --- linux-ec2-2.6.31.orig/drivers/watchdog/riowd.c +++ linux-ec2-2.6.31/drivers/watchdog/riowd.c @@ -206,7 +206,7 @@ dev_set_drvdata(&op->dev, p); riowd_device = p; - err = 0; + return 0; out_iounmap: of_iounmap(&op->resource[0], p->regs, 2); --- linux-ec2-2.6.31.orig/drivers/pci/pci.c +++ linux-ec2-2.6.31/drivers/pci/pci.c @@ -380,7 +380,12 @@ * Restore the BAR values for a given device, so as to make it * accessible by its driver. */ +#ifndef CONFIG_XEN static void +#else +EXPORT_SYMBOL_GPL(pci_restore_bars); +void +#endif pci_restore_bars(struct pci_dev *dev) { int i; @@ -2574,6 +2579,13 @@ */ int pci_is_reassigndev(struct pci_dev *dev) { +#ifdef CONFIG_PCI_GUESTDEV + int result; + + result = pci_is_guestdev_to_reassign(dev); + if (result) + return result; +#endif /* CONFIG_PCI_GUESTDEV */ return (pci_specified_resource_alignment(dev) != 0); } @@ -2650,6 +2662,11 @@ return 0; } +void __weak pci_fixup_cardbus(struct pci_bus *bus) +{ +} +EXPORT_SYMBOL(pci_fixup_cardbus); + static int __init pci_setup(char *str) { while (str) { --- linux-ec2-2.6.31.orig/drivers/pci/quirks.c +++ linux-ec2-2.6.31/drivers/pci/quirks.c @@ -1201,6 +1201,7 @@ switch(dev->subsystem_device) { case 0x00b8: /* Compaq Evo D510 CMT */ case 0x00b9: /* Compaq Evo D510 SFF */ + case 0x00ba: /* Compaq Evo D510 USDT */ /* Motherboard doesn't have Host bridge * subvendor/subdevice IDs and on-board VGA * controller is disabled if an AGP card is @@ -2382,8 +2383,10 @@ } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev) { @@ -2492,6 +2495,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e7, quirk_i82576_sriov); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e8, quirk_i82576_sriov); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x150a, quirk_i82576_sriov); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x150d, quirk_i82576_sriov); #endif /* CONFIG_PCI_IOV */ --- linux-ec2-2.6.31.orig/drivers/pci/setup-res.c +++ linux-ec2-2.6.31/drivers/pci/setup-res.c @@ -144,7 +144,7 @@ size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = pci_resource_alignment(dev, res); + align = resource_alignment(res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -178,7 +178,7 @@ struct pci_bus *bus; int ret; - align = pci_resource_alignment(dev, res); + align = resource_alignment(res); if (!align) { dev_info(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", @@ -259,7 +259,7 @@ if (!(r->flags) || r->parent) continue; - r_align = pci_resource_alignment(dev, r); + r_align = resource_alignment(r); if (!r_align) { dev_warn(&dev->dev, "BAR %d: bogus alignment " "%pR flags %#lx\n", @@ -271,7 +271,7 @@ struct resource_list *ln = list->next; if (ln) - align = pci_resource_alignment(ln->dev, ln->res); + align = resource_alignment(ln->res); if (r_align > align) { tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); --- linux-ec2-2.6.31.orig/drivers/pci/msi-xen.c +++ linux-ec2-2.6.31/drivers/pci/msi-xen.c @@ -0,0 +1,837 @@ +/* + * File: msi.c + * Purpose: PCI Message Signaled Interrupt (MSI) + * + * Copyright (C) 2003-2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "pci.h" +#include "msi.h" + +static int pci_msi_enable = 1; + +static LIST_HEAD(msi_dev_head); +DEFINE_SPINLOCK(msi_dev_lock); + +struct msi_dev_list { + struct pci_dev *dev; + struct list_head list; + spinlock_t pirq_list_lock; + struct list_head pirq_list_head; + /* Store default pre-assigned irq */ + unsigned int default_irq; +}; + +struct msi_pirq_entry { + struct list_head list; + int pirq; + int entry_nr; +}; + +/* Arch hooks */ + +#ifndef arch_msi_check_device +int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +{ + return 0; +} +#endif + +#ifndef arch_setup_msi_irqs +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + struct msi_desc *entry; + int ret; + + /* + * If an architecture wants to support multiple MSI, it needs to + * override arch_setup_msi_irqs() + */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + list_for_each_entry(entry, &dev->msi_list, list) { + ret = arch_setup_msi_irq(dev, entry); + if (ret < 0) + return ret; + if (ret > 0) + return -ENOSPC; + } + + return 0; +} +#endif + +#ifndef arch_teardown_msi_irqs +void arch_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + + list_for_each_entry(entry, &dev->msi_list, list) { + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); + } +} +#endif + +static void msi_set_enable(struct pci_dev *dev, int pos, int enable) +{ + u16 control; + + BUG_ON(!pos); + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + control &= ~PCI_MSI_FLAGS_ENABLE; + if (enable) + control |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); +} + +static void msix_set_enable(struct pci_dev *dev, int enable) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (pos) { + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); + control &= ~PCI_MSIX_FLAGS_ENABLE; + if (enable) + control |= PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + } +} + +static struct msi_dev_list *get_msi_dev_pirq_list(struct pci_dev *dev) +{ + struct msi_dev_list *msi_dev_list, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&msi_dev_lock, flags); + + list_for_each_entry(msi_dev_list, &msi_dev_head, list) + if ( msi_dev_list->dev == dev ) + ret = msi_dev_list; + + if ( ret ) { + spin_unlock_irqrestore(&msi_dev_lock, flags); + return ret; + } + + /* Has not allocate msi_dev until now. */ + ret = kzalloc(sizeof(struct msi_dev_list), GFP_ATOMIC); + + /* Failed to allocate msi_dev structure */ + if ( !ret ) { + spin_unlock_irqrestore(&msi_dev_lock, flags); + return NULL; + } + + ret->dev = dev; + spin_lock_init(&ret->pirq_list_lock); + INIT_LIST_HEAD(&ret->pirq_list_head); + list_add_tail(&ret->list, &msi_dev_head); + spin_unlock_irqrestore(&msi_dev_lock, flags); + return ret; +} + +static int attach_pirq_entry(int pirq, int entry_nr, + struct msi_dev_list *msi_dev_entry) +{ + struct msi_pirq_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + unsigned long flags; + + if (!entry) + return -ENOMEM; + entry->pirq = pirq; + entry->entry_nr = entry_nr; + spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags); + list_add_tail(&entry->list, &msi_dev_entry->pirq_list_head); + spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); + return 0; +} + +static void detach_pirq_entry(int entry_nr, + struct msi_dev_list *msi_dev_entry) +{ + unsigned long flags; + struct msi_pirq_entry *pirq_entry; + + list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) { + if (pirq_entry->entry_nr == entry_nr) { + spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags); + list_del(&pirq_entry->list); + spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); + kfree(pirq_entry); + return; + } + } +} + +/* + * pciback will provide device's owner + */ +static int (*get_owner)(struct pci_dev *dev); + +int register_msi_get_owner(int (*func)(struct pci_dev *dev)) +{ + if (get_owner) { + printk(KERN_WARNING "register msi_get_owner again\n"); + return -EEXIST; + } + get_owner = func; + return 0; +} +EXPORT_SYMBOL(register_msi_get_owner); + +int unregister_msi_get_owner(int (*func)(struct pci_dev *dev)) +{ + if (get_owner != func) + return -EINVAL; + get_owner = NULL; + return 0; +} +EXPORT_SYMBOL(unregister_msi_get_owner); + +static int msi_get_dev_owner(struct pci_dev *dev) +{ + int owner; + + BUG_ON(!is_initial_xendomain()); + if (get_owner && (owner = get_owner(dev)) >= 0) { + dev_info(&dev->dev, "get owner: %x \n", owner); + return owner; + } + + return DOMID_SELF; +} + +static int msi_unmap_pirq(struct pci_dev *dev, int pirq) +{ + struct physdev_unmap_pirq unmap; + int rc; + + unmap.domid = msi_get_dev_owner(dev); + /* See comments in msi_map_vector, input parameter pirq means + * irq number only if the device belongs to dom0 itself. + */ + unmap.pirq = (unmap.domid != DOMID_SELF) + ? pirq : evtchn_get_xen_pirq(pirq); + + if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap))) + dev_warn(&dev->dev, "unmap irq %x failed\n", pirq); + + if (rc < 0) + return rc; + + if (unmap.domid == DOMID_SELF) + evtchn_map_pirq(pirq, 0); + + return 0; +} + +static u64 find_table_base(struct pci_dev *dev, int pos) +{ + u8 bar; + u32 reg; + unsigned long flags; + + pci_read_config_dword(dev, msix_table_offset_reg(pos), ®); + bar = reg & PCI_MSIX_FLAGS_BIRMASK; + + flags = pci_resource_flags(dev, bar); + if (flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET | IORESOURCE_BUSY)) + return 0; + + return pci_resource_start(dev, bar); +} + +/* + * Protected by msi_lock + */ +static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base) +{ + struct physdev_map_pirq map_irq; + int rc; + domid_t domid = DOMID_SELF; + + domid = msi_get_dev_owner(dev); + + map_irq.domid = domid; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; + map_irq.entry_nr = entry_nr; + map_irq.table_base = table_base; + + if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq))) + dev_warn(&dev->dev, "map irq failed\n"); + + if (rc < 0) + return rc; + /* This happens when MSI support is not enabled in older Xen. */ + if (rc == 0 && map_irq.pirq < 0) + return -ENOSYS; + + BUG_ON(map_irq.pirq <= 0); + + /* If mapping of this particular MSI is on behalf of another domain, + * we do not need to get an irq in dom0. This also implies: + * dev->irq in dom0 will be 'Xen pirq' if this device belongs to + * to another domain, and will be 'Linux irq' if it belongs to dom0. + */ + if (domid == DOMID_SELF) { + rc = evtchn_map_pirq(-1, map_irq.pirq); + dev_printk(KERN_DEBUG, &dev->dev, + "irq %d (%d) for MSI/MSI-X\n", + rc, map_irq.pirq); + return rc; + } + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for dom%d MSI/MSI-X\n", + map_irq.pirq, domid); + return map_irq.pirq; +} + +static void pci_intx_for_msi(struct pci_dev *dev, int enable) +{ + if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) + pci_intx(dev, enable); +} + +void pci_restore_msi_state(struct pci_dev *dev) +{ + int rc; + struct physdev_restore_msi restore; + + if (!dev->msi_enabled && !dev->msix_enabled) + return; + + pci_intx_for_msi(dev, 0); + if (dev->msi_enabled) { + int pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + + msi_set_enable(dev, pos, 0); + } + if (dev->msix_enabled) + msix_set_enable(dev, 0); + + restore.bus = dev->bus->number; + restore.devfn = dev->devfn; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); + WARN(rc && rc != -ENOSYS, "restore_msi -> %d\n", rc); +} +EXPORT_SYMBOL_GPL(pci_restore_msi_state); + +/** + * msi_capability_init - configure device's MSI capability structure + * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate + * + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI irq. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec) +{ + int pos, pirq; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + msi_set_enable(dev, pos, 0); /* Disable MSI during set up */ + + pci_read_config_word(dev, msi_control_reg(pos), &control); + + WARN_ON(nvec > 1); /* XXX */ + pirq = msi_map_vector(dev, 0, 0); + if (pirq < 0) + return -EBUSY; + + /* Set MSI enabled bits */ + pci_intx_for_msi(dev, 0); + msi_set_enable(dev, pos, 1); + dev->msi_enabled = 1; + + dev->irq = pirq; + return 0; +} + +/** + * msix_capability_init - configure device's MSI-X capability + * @dev: pointer to the pci_dev data structure of MSI-X device function + * @entries: pointer to an array of struct msix_entry entries + * @nvec: number of @entries + * + * Setup the MSI-X capability structure of device function with a + * single MSI-X irq. A return of zero indicates the successful setup of + * requested MSI-X entries with allocated irqs or non-zero for otherwise. + **/ +static int msix_capability_init(struct pci_dev *dev, + struct msix_entry *entries, int nvec) +{ + u64 table_base; + int pirq, i, j, mapped, pos; + u16 control; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + struct msi_pirq_entry *pirq_entry; + + if (!msi_dev_entry) + return -ENOMEM; + + msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); + + /* Ensure MSI-X is disabled while it is set up */ + control &= ~PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + + table_base = find_table_base(dev, pos); + if (!table_base) + return -ENODEV; + + /* + * Some devices require MSI-X to be enabled before we can touch the + * MSI-X registers. We need to mask all the vectors to prevent + * interrupts coming in before they're fully set up. + */ + control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + + for (i = 0; i < nvec; i++) { + mapped = 0; + list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) { + if (pirq_entry->entry_nr == entries[i].entry) { + dev_warn(&dev->dev, + "msix entry %d was not freed\n", + entries[i].entry); + (entries + i)->vector = pirq_entry->pirq; + mapped = 1; + break; + } + } + if (mapped) + continue; + pirq = msi_map_vector(dev, entries[i].entry, table_base); + if (pirq < 0) + break; + attach_pirq_entry(pirq, entries[i].entry, msi_dev_entry); + (entries + i)->vector = pirq; + } + + if (i != nvec) { + int avail = i - 1; + for (j = --i; j >= 0; j--) { + msi_unmap_pirq(dev, entries[j].vector); + detach_pirq_entry(entries[j].entry, msi_dev_entry); + entries[j].vector = 0; + } + /* If we had some success report the number of irqs + * we succeeded in setting up. + */ + if (avail <= 0) + avail = -EBUSY; + return avail; + } + + /* Set MSI-X enabled bits and unmask the function */ + pci_intx_for_msi(dev, 0); + dev->msix_enabled = 1; + + control &= ~PCI_MSIX_FLAGS_MASKALL; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + + return 0; +} + +/** + * pci_msi_check_device - check whether MSI may be enabled on a device + * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: how many MSIs have been requested ? + * @type: are we checking for MSI or MSI-X ? + * + * Look at global flags, the device itself, and its parent busses + * to determine if MSI/-X are supported for the device. If MSI/-X is + * supported return 0, else return an error code. + **/ +static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) +{ + struct pci_bus *bus; + int ret; + + /* MSI must be globally enabled and supported by the device */ + if (!pci_msi_enable || !dev || dev->no_msi) + return -EINVAL; + + /* + * You can't ask to have 0 or less MSIs configured. + * a) it's stupid .. + * b) the list manipulation code assumes nvec >= 1. + */ + if (nvec < 1) + return -ERANGE; + + /* Any bridge which does NOT route MSI transactions from it's + * secondary bus to it's primary bus must set NO_MSI flag on + * the secondary pci_bus. + * We expect only arch-specific PCI host bus controller driver + * or quirks for specific PCI bridges to be setting NO_MSI. + */ + for (bus = dev->bus; bus; bus = bus->parent) + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return -EINVAL; + + ret = arch_msi_check_device(dev, nvec, type); + if (ret) + return ret; + + if (!pci_find_capability(dev, type)) + return -EINVAL; + + return 0; +} + +/** + * pci_enable_msi_block - configure device's MSI capability structure + * @dev: device to configure + * @nvec: number of interrupts to configure + * + * Allocate IRQs for a device with the MSI capability. + * This function returns a negative errno if an error occurs. If it + * is unable to allocate the number of interrupts requested, it returns + * the number of interrupts it might be able to allocate. If it successfully + * allocates at least the number of interrupts requested, it returns 0 and + * updates the @dev's irq member to the lowest new interrupt number; the + * other interrupt numbers allocated to this device are consecutive. + */ +extern int pci_frontend_enable_msi(struct pci_dev *dev); +int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) +{ + int temp, status, pos, maxvec; + u16 msgctl; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + if (nvec > maxvec) + return maxvec; + + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); + if (status) + return status; + +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) + { + int ret; + + temp = dev->irq; + WARN_ON(nvec > 1); /* XXX */ + ret = pci_frontend_enable_msi(dev); + if (ret) + return ret; + + dev->irq = evtchn_map_pirq(-1, dev->irq); + msi_dev_entry->default_irq = temp; + + return ret; + } +#endif + + temp = dev->irq; + + /* Check whether driver already requested MSI-X irqs */ + if (dev->msix_enabled) { + dev_info(&dev->dev, "can't enable MSI " + "(MSI-X already enabled)\n"); + return -EINVAL; + } + + status = msi_capability_init(dev, nvec); + if ( !status ) + msi_dev_entry->default_irq = temp; + + return status; +} +EXPORT_SYMBOL(pci_enable_msi_block); + +extern void pci_frontend_disable_msi(struct pci_dev* dev); +void pci_msi_shutdown(struct pci_dev *dev) +{ + int pirq, pos; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + + if (!pci_msi_enable || !dev) + return; + +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) { + evtchn_map_pirq(dev->irq, 0); + pci_frontend_disable_msi(dev); + dev->irq = msi_dev_entry->default_irq; + return; + } +#endif + + if (!dev->msi_enabled) + return; + + pirq = dev->irq; + /* Restore dev->irq to its default pin-assertion vector */ + dev->irq = msi_dev_entry->default_irq; + msi_unmap_pirq(dev, pirq); + + /* Disable MSI mode */ + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + msi_set_enable(dev, pos, 0); + pci_intx_for_msi(dev, 1); + dev->msi_enabled = 0; +} + +void pci_disable_msi(struct pci_dev* dev) +{ + pci_msi_shutdown(dev); +} +EXPORT_SYMBOL(pci_disable_msi); + +/** + * pci_msix_table_size - return the number of device's MSI-X table entries + * @dev: pointer to the pci_dev data structure of MSI-X device function + */ +int pci_msix_table_size(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (!pos) + return 0; + + pci_read_config_word(dev, msi_control_reg(pos), &control); + return multi_msix_capable(control); +} + +/** + * pci_enable_msix - configure device's MSI-X capability structure + * @dev: pointer to the pci_dev data structure of MSI-X device function + * @entries: pointer to an array of MSI-X entries + * @nvec: number of MSI-X irqs requested for allocation by device driver + * + * Setup the MSI-X capability structure of device function with the number + * of requested irqs upon its software driver call to request for + * MSI-X mode enabled on its hardware device function. A return of zero + * indicates the successful configuration of MSI-X capability structure + * with new allocated MSI-X irqs. A return of < 0 indicates a failure. + * Or a return of > 0 indicates that driver request is exceeding the number + * of irqs or MSI-X vectors available. Driver should use the returned value to + * re-send its request. + **/ +extern int pci_frontend_enable_msix(struct pci_dev *dev, + struct msix_entry *entries, int nvec); +int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) +{ + int status, nr_entries; + int i, j, temp; + struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev); + + if (!entries) + return -EINVAL; + +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) { + struct msi_pirq_entry *pirq_entry; + int ret, irq; + + temp = dev->irq; + ret = pci_frontend_enable_msix(dev, entries, nvec); + if (ret) { + dev_warn(&dev->dev, + "got %x from frontend_enable_msix\n", ret); + return ret; + } + msi_dev_entry->default_irq = temp; + + for (i = 0; i < nvec; i++) { + int mapped = 0; + + list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) { + if (pirq_entry->entry_nr == entries[i].entry) { + irq = pirq_entry->pirq; + BUG_ON(entries[i].vector != evtchn_get_xen_pirq(irq)); + entries[i].vector = irq; + mapped = 1; + break; + } + } + if (mapped) + continue; + irq = evtchn_map_pirq(-1, entries[i].vector); + attach_pirq_entry(irq, entries[i].entry, msi_dev_entry); + entries[i].vector = irq; + } + return 0; + } +#endif + + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX); + if (status) + return status; + + nr_entries = pci_msix_table_size(dev); + if (nvec > nr_entries) + return nr_entries; + + /* Check for any invalid entries */ + for (i = 0; i < nvec; i++) { + if (entries[i].entry >= nr_entries) + return -EINVAL; /* invalid entry */ + for (j = i + 1; j < nvec; j++) { + if (entries[i].entry == entries[j].entry) + return -EINVAL; /* duplicate entry */ + } + } + + temp = dev->irq; + /* Check whether driver already requested for MSI vector */ + if (dev->msi_enabled) { + dev_info(&dev->dev, "can't enable MSI-X " + "(MSI IRQ already assigned)\n"); + return -EINVAL; + } + + status = msix_capability_init(dev, entries, nvec); + + if ( !status ) + msi_dev_entry->default_irq = temp; + + return status; +} +EXPORT_SYMBOL(pci_enable_msix); + +extern void pci_frontend_disable_msix(struct pci_dev* dev); +void pci_msix_shutdown(struct pci_dev* dev) +{ + if (!pci_msi_enable) + return; + if (!dev) + return; + +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) { + struct msi_dev_list *msi_dev_entry; + struct msi_pirq_entry *pirq_entry, *tmp; + + pci_frontend_disable_msix(dev); + + msi_dev_entry = get_msi_dev_pirq_list(dev); + list_for_each_entry_safe(pirq_entry, tmp, + &msi_dev_entry->pirq_list_head, list) { + evtchn_map_pirq(pirq_entry->pirq, 0); + list_del(&pirq_entry->list); + kfree(pirq_entry); + } + + dev->irq = msi_dev_entry->default_irq; + return; + } +#endif + + if (!dev->msix_enabled) + return; + + msi_remove_pci_irq_vectors(dev); + + /* Disable MSI mode */ + msix_set_enable(dev, 0); + pci_intx_for_msi(dev, 1); + dev->msix_enabled = 0; +} +void pci_disable_msix(struct pci_dev* dev) +{ + pci_msix_shutdown(dev); +} +EXPORT_SYMBOL(pci_disable_msix); + +/** + * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state + * @dev: pointer to the pci_dev data structure of MSI(X) device function + * + * Being called during hotplug remove, from which the device function + * is hot-removed. All previous assigned MSI/MSI-X irqs, if + * allocated for this device function, are reclaimed to unused state, + * which may be used later on. + **/ +void msi_remove_pci_irq_vectors(struct pci_dev* dev) +{ + unsigned long flags; + struct msi_dev_list *msi_dev_entry; + struct msi_pirq_entry *pirq_entry, *tmp; + + if (!pci_msi_enable || !dev) + return; + + msi_dev_entry = get_msi_dev_pirq_list(dev); + + spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags); + if (!list_empty(&msi_dev_entry->pirq_list_head)) + list_for_each_entry_safe(pirq_entry, tmp, + &msi_dev_entry->pirq_list_head, list) { + msi_unmap_pirq(dev, pirq_entry->pirq); + list_del(&pirq_entry->list); + kfree(pirq_entry); + } + spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); + dev->irq = msi_dev_entry->default_irq; +} + +void pci_no_msi(void) +{ + pci_msi_enable = 0; +} + +/** + * pci_msi_enabled - is MSI enabled? + * + * Returns true if MSI has not been disabled by the command-line option + * pci=nomsi. + **/ +int pci_msi_enabled(void) +{ + return pci_msi_enable; +} +EXPORT_SYMBOL(pci_msi_enabled); + +void pci_msi_init_pci_dev(struct pci_dev *dev) +{ +#ifndef CONFIG_XEN + INIT_LIST_HEAD(&dev->msi_list); +#endif +} --- linux-ec2-2.6.31.orig/drivers/pci/reserve.c +++ linux-ec2-2.6.31/drivers/pci/reserve.c @@ -0,0 +1,143 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +#include +#include + +#include + +static char pci_reserve_param[COMMAND_LINE_SIZE]; + +/* pci_reserve= [PCI] + * Format: [[+IO][+MEM]][,...] + * Format of sbdf: [:]:. + */ +static int pci_reserve_parse_size(const char *str, + unsigned long *io_size, + unsigned long *mem_size) +{ + if (sscanf(str, "io%lx", io_size) == 1 || + sscanf(str, "IO%lx", io_size) == 1) + return 0; + + if (sscanf(str, "mem%lx", mem_size) == 1 || + sscanf(str, "MEM%lx", mem_size) == 1) + return 0; + + return -EINVAL; +} + +static int pci_reserve_parse_one(const char *str, + int *seg, int *bus, int *dev, int *func, + unsigned long *io_size, + unsigned long *mem_size) +{ + char *p; + + *io_size = 0; + *mem_size = 0; + + if (sscanf(str, "%x:%x:%x.%x", seg, bus, dev, func) != 4) { + *seg = 0; + if (sscanf(str, "%x:%x.%x", bus, dev, func) != 3) { + return -EINVAL; + } + } + + p = strchr(str, '+'); + if (p == NULL) + return -EINVAL; + p++; + if (pci_reserve_parse_size(p, io_size, mem_size)) + return -EINVAL; + + p = strchr(str, '+'); + if (p != NULL) { + p++; + pci_reserve_parse_size(p, io_size, mem_size); + } + return 0; +} + +static unsigned long pci_reserve_size(struct pci_bus *pbus, int flags) +{ + char *sp; + char *ep; + + int seg; + int bus; + int dev; + int func; + + unsigned long io_size; + unsigned long mem_size; + + sp = pci_reserve_param; + + do { + ep = strchr(sp, ','); + if (ep) + *ep = '\0'; /* chomp */ + + if (pci_reserve_parse_one(sp, &seg, &bus, &dev, &func, + &io_size, &mem_size) == 0) { + if (pci_domain_nr(pbus) == seg && + pbus->number == bus && + PCI_SLOT(pbus->self->devfn) == dev && + PCI_FUNC(pbus->self->devfn) == func) { + switch (flags) { + case IORESOURCE_IO: + return io_size; + case IORESOURCE_MEM: + return mem_size; + default: + break; + } + } + } + + if (ep) { + *ep = ','; /* restore chomp'ed ',' for later */ + ep++; + } + sp = ep; + } while (ep); + + return 0; +} + +unsigned long pci_reserve_size_io(struct pci_bus *pbus) +{ + return pci_reserve_size(pbus, IORESOURCE_IO); +} + +unsigned long pci_reserve_size_mem(struct pci_bus *pbus) +{ + return pci_reserve_size(pbus, IORESOURCE_MEM); +} + +static int __init pci_reserve_setup(char *str) +{ + if (strlen(str) >= sizeof(pci_reserve_param)) + return 0; + strlcpy(pci_reserve_param, str, sizeof(pci_reserve_param)); + return 1; +} +__setup("pci_reserve=", pci_reserve_setup); --- linux-ec2-2.6.31.orig/drivers/pci/guestdev.c +++ linux-ec2-2.6.31/drivers/pci/guestdev.c @@ -0,0 +1,887 @@ +/* + * Copyright (c) 2008, 2009 NEC Corporation. + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define HID_LEN 8 +#define UID_LEN 8 +#define DEV_LEN 2 +#define FUNC_LEN 1 +#define DEV_NUM_MAX 31 +#define FUNC_NUM_MAX 7 +#define INVALID_SEG (-1) +#define INVALID_BBN (-1) +#define GUESTDEV_STR_MAX 128 + +#define GUESTDEV_FLAG_TYPE_MASK 0x3 +#define GUESTDEV_FLAG_DEVICEPATH 0x1 +#define GUESTDEV_FLAG_SBDF 0x2 + +#define GUESTDEV_OPT_IOMUL 0x1 + +struct guestdev { + int flags; + int options; + struct list_head root_list; + union { + struct devicepath { + char hid[HID_LEN + 1]; + char uid[UID_LEN + 1]; + int seg; + int bbn; + struct devicepath_node *child; + } devicepath; + struct sbdf { + int seg; + int bus; + int dev; + int func; + } sbdf; + } u; +}; + +struct devicepath_node { + int dev; + int func; + struct devicepath_node *child; +}; + +struct pcidev_sbdf { + int seg; + int bus; + struct pcidev_sbdf_node *child; +}; + +struct pcidev_sbdf_node { + int dev; + int func; + struct pcidev_sbdf_node *child; +}; + +static char guestdev_param[COMMAND_LINE_SIZE]; +static LIST_HEAD(guestdev_list); + +/* Get hid and uid */ +static int __init pci_get_hid_uid(char *str, char *hid, char *uid) +{ + char *sp, *ep; + int len; + + sp = str; + ep = strchr(sp, ':'); + if (!ep) { + ep = strchr(sp, '-'); + if (!ep) + goto format_err_end; + } + /* hid length */ + len = ep - sp; + if (len <= 0 || HID_LEN < len) + goto format_err_end; + + strlcpy(hid, sp, len); + + if (*ep == '-') { /* no uid */ + uid[0] = '\0'; + return TRUE; + } + + sp = ep + 1; + ep = strchr(sp, '-'); + if (!ep) + ep = strchr(sp, '\0'); + + /* uid length */ + len = ep - sp; + if (len <= 0 || UID_LEN < len) + goto format_err_end; + + strlcpy(uid, sp, len); + return TRUE; + +format_err_end: + return FALSE; +} + +/* Get device and function */ +static int __init pci_get_dev_func(char *str, int *dev, int *func) +{ + if (sscanf(str, "%02x.%01x", dev, func) != 2) + goto format_err_end; + + if (*dev < 0 || DEV_NUM_MAX < *dev) + goto format_err_end; + + if (*func < 0 || FUNC_NUM_MAX < *func) + goto format_err_end; + + return TRUE; + +format_err_end: + return FALSE; +} + +/* Check extended guestdev parameter format error */ +static int __init pci_check_extended_guestdev_format(char *str) +{ + int flg; + char *p; + + /* Check extended format */ + if (strpbrk(str, "(|)") == NULL) + return TRUE; + + flg = 0; + p = str; + while (*p) { + switch (*p) { + case '(': + /* Check nesting error */ + if (flg != 0) + goto format_err_end; + flg = 1; + /* Check position of '(' is head or + previos charactor of '(' is not '-'. */ + if (p == str || *(p - 1) != '-') + goto format_err_end; + break; + case ')': + /* Check nesting error */ + if (flg != 1) + goto format_err_end; + flg = 0; + /* Check next charactor of ')' is not '\0' */ + if (*(p + 1) != '\0') + goto format_err_end; + break; + case '|': + /* Check position of '|' is outside of '(' and ')' */ + if (flg != 1) + goto format_err_end; + break; + default: + break; + } + p++; + } + /* Check number of '(' and ')' are not equal */ + if (flg != 0) + goto format_err_end; + return TRUE; + +format_err_end: + printk(KERN_ERR + "PCI: The format of the guestdev parameter is illegal. [%s]\n", + str); + return FALSE; +} + +/* Make guestdev strings */ +static void pci_make_guestdev_str(struct guestdev *gdev, + char *gdev_str, int buf_size) +{ + struct devicepath_node *node; + int count; + + switch (gdev->flags & GUESTDEV_FLAG_TYPE_MASK) { + case GUESTDEV_FLAG_DEVICEPATH: + memset(gdev_str, 0, buf_size); + + if (strlen(gdev->u.devicepath.uid)) + count = snprintf(gdev_str, buf_size, "%s:%s", + gdev->u.devicepath.hid, + gdev->u.devicepath.uid); + else + count = snprintf(gdev_str, buf_size, "%s", + gdev->u.devicepath.hid); + if (count < 0) + return; + + node = gdev->u.devicepath.child; + while (node) { + gdev_str += count; + buf_size -= count; + if (buf_size <= 0) + return; + count = snprintf(gdev_str, buf_size, "-%02x.%01x", + node->dev, node->func); + if (count < 0) + return; + node = node->child; + } + break; + case GUESTDEV_FLAG_SBDF: + snprintf(gdev_str, buf_size, "%04x:%02x:%02x.%01x", + gdev->u.sbdf.seg, gdev->u.sbdf.bus, + gdev->u.sbdf.dev, gdev->u.sbdf.func); + break; + default: + BUG(); + } +} + +/* Free guestdev and nodes */ +static void __init pci_free_guestdev(struct guestdev *gdev) +{ + struct devicepath_node *node, *next; + + if (!gdev) + return; + if (gdev->flags & GUESTDEV_FLAG_DEVICEPATH) { + node = gdev->u.devicepath.child; + while (node) { + next = node->child; + kfree(node); + node = next; + } + } + list_del(&gdev->root_list); + kfree(gdev); +} + +/* Copy guestdev and nodes */ +struct guestdev __init *pci_copy_guestdev(struct guestdev *gdev_src) +{ + struct guestdev *gdev; + struct devicepath_node *node, *node_src, *node_upper; + + BUG_ON(!(gdev_src->flags & GUESTDEV_FLAG_DEVICEPATH)); + + gdev = kmalloc(sizeof(*gdev), GFP_KERNEL); + if (!gdev) + goto allocate_err_end; + + memset(gdev, 0, sizeof(*gdev)); + INIT_LIST_HEAD(&gdev->root_list); + gdev->flags = gdev_src->flags; + gdev->options = gdev_src->options; + strcpy(gdev->u.devicepath.hid, gdev_src->u.devicepath.hid); + strcpy(gdev->u.devicepath.uid, gdev_src->u.devicepath.uid); + gdev->u.devicepath.seg = gdev_src->u.devicepath.seg; + gdev->u.devicepath.bbn = gdev_src->u.devicepath.bbn; + + node_upper = NULL; + + node_src = gdev_src->u.devicepath.child; + while (node_src) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + goto allocate_err_end; + memset(node, 0, sizeof(*node)); + node->dev = node_src->dev; + node->func = node_src->func; + if (!node_upper) + gdev->u.devicepath.child = node; + else + node_upper->child = node; + node_upper = node; + node_src = node_src->child; + } + + return gdev; + +allocate_err_end: + if (gdev) + pci_free_guestdev(gdev); + printk(KERN_ERR "PCI: Failed to allocate memory.\n"); + return NULL; +} + +/* Make guestdev from path strings */ +static int __init pci_make_devicepath_guestdev(char *path_str, int options) +{ + char hid[HID_LEN + 1], uid[UID_LEN + 1]; + char *sp, *ep; + struct guestdev *gdev, *gdev_org; + struct devicepath_node *node, *node_tmp; + int dev, func, ret_val; + + ret_val = 0; + gdev = gdev_org = NULL; + sp = path_str; + /* Look for end of hid:uid'-' */ + ep = strchr(sp, '-'); + /* Only hid, uid. (No dev, func) */ + if (!ep) + goto format_err_end; + + memset(hid, 0 ,sizeof(hid)); + memset(uid, 0, sizeof(uid)); + if (!pci_get_hid_uid(sp, hid, uid)) + goto format_err_end; + + gdev_org = kmalloc(sizeof(*gdev_org), GFP_KERNEL); + if (!gdev_org) + goto allocate_err_end; + memset(gdev_org, 0, sizeof(*gdev_org)); + INIT_LIST_HEAD(&gdev_org->root_list); + gdev_org->flags = GUESTDEV_FLAG_DEVICEPATH; + gdev_org->options = options; + strcpy(gdev_org->u.devicepath.hid, hid); + strcpy(gdev_org->u.devicepath.uid, uid); + gdev_org->u.devicepath.seg = INVALID_SEG; + gdev_org->u.devicepath.bbn = INVALID_BBN; + + gdev = gdev_org; + + sp = ep + 1; + ep = sp; + do { + if (*sp == '(') { + sp++; + if (strchr(sp, '|')) { + gdev = pci_copy_guestdev(gdev_org); + if (!gdev) { + ret_val = -ENOMEM; + goto end; + } + } + continue; + } + if (gdev && pci_get_dev_func(sp, &dev, &func)) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + goto allocate_err_end; + memset(node, 0, sizeof(*node)); + node->dev = dev; + node->func = func; + /* add node to end of guestdev */ + if (gdev->u.devicepath.child) { + node_tmp = gdev->u.devicepath.child; + while (node_tmp->child) { + node_tmp = node_tmp->child; + } + node_tmp->child = node; + } else + gdev->u.devicepath.child = node; + } else if (gdev) { + printk(KERN_ERR + "PCI: Can't obtain dev# and #func# from %s.\n", + sp); + ret_val = -EINVAL; + if (gdev == gdev_org) + goto end; + pci_free_guestdev(gdev); + gdev = NULL; + } + + ep = strpbrk(sp, "-|)"); + if (!ep) + ep = strchr(sp, '\0'); + /* Is *ep '|' OR ')' OR '\0' ? */ + if (*ep != '-') { + if (gdev) + list_add_tail(&gdev->root_list, &guestdev_list); + if (*ep == '|') { + /* Between '|' and '|' ? */ + if (strchr(ep + 1, '|')) { + gdev = pci_copy_guestdev(gdev_org); + if (!gdev) { + ret_val = -ENOMEM; + goto end; + } + } else { + gdev = gdev_org; + gdev_org = NULL; + } + } else { + gdev_org = NULL; + gdev = NULL; + } + } + if (*ep == ')') + ep++; + sp = ep + 1; + } while (*ep != '\0'); + + goto end; + +format_err_end: + printk(KERN_ERR + "PCI: The format of the guestdev parameter is illegal. [%s]\n", + path_str); + ret_val = -EINVAL; + goto end; + +allocate_err_end: + printk(KERN_ERR "PCI: Failed to allocate memory.\n"); + ret_val = -ENOMEM; + goto end; + +end: + if (gdev_org && (gdev_org != gdev)) + pci_free_guestdev(gdev_org); + if (gdev) + pci_free_guestdev(gdev); + return ret_val; +} + +static int __init pci_make_sbdf_guestdev(char* str, int options) +{ + struct guestdev *gdev; + int seg, bus, dev, func; + + if (sscanf(str, "%x:%x:%x.%x", &seg, &bus, &dev, &func) != 4) { + seg = 0; + if (sscanf(str, "%x:%x.%x", &bus, &dev, &func) != 3) + return -EINVAL; + } + gdev = kmalloc(sizeof(*gdev), GFP_KERNEL); + if (!gdev) { + printk(KERN_ERR "PCI: Failed to allocate memory.\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&gdev->root_list); + gdev->flags = GUESTDEV_FLAG_SBDF; + gdev->options = options; + gdev->u.sbdf.seg = seg; + gdev->u.sbdf.bus = bus; + gdev->u.sbdf.dev = dev; + gdev->u.sbdf.func = func; + list_add_tail(&gdev->root_list, &guestdev_list); + return 0; +} + +static int __init pci_parse_options(const char *str) +{ + int options = 0; + char *ep; + + while (str) { + str++; + ep = strchr(str, '+'); + if (ep) + ep = '\0'; /* Chop */ + + if (!strcmp(str, "iomul")) + options |= GUESTDEV_OPT_IOMUL; + + str = ep; + } + return options; +} + +/* Parse guestdev parameter */ +static int __init pci_parse_guestdev(void) +{ + int len; + char *sp, *ep, *op; + int options; + struct list_head *head; + struct guestdev *gdev; + char path_str[GUESTDEV_STR_MAX]; + int ret_val = 0; + + len = strlen(guestdev_param); + if (len == 0) + return 0; + + sp = guestdev_param; + + do { + ep = strchr(sp, ','); + /* Chop */ + if (ep) + *ep = '\0'; + options = 0; + op = strchr(sp, '+'); + if (op && (!ep || op < ep)) { + options = pci_parse_options(op); + *op = '\0'; /* Chop */ + } + ret_val = pci_make_sbdf_guestdev(sp, options); + if (ret_val == -EINVAL) { + if (pci_check_extended_guestdev_format(sp)) { + ret_val = pci_make_devicepath_guestdev( + sp, options); + if (ret_val && ret_val != -EINVAL) + break; + } + } else if (ret_val) + break; + + if (ep) + ep++; + sp = ep; + } while (ep); + + list_for_each(head, &guestdev_list) { + gdev = list_entry(head, struct guestdev, root_list); + pci_make_guestdev_str(gdev, path_str, GUESTDEV_STR_MAX); + printk(KERN_DEBUG + "PCI: %s has been reserved for guest domain.\n", + path_str); + } + return 0; +} + +arch_initcall(pci_parse_guestdev); + +/* Get command line */ +static int __init pci_guestdev_setup(char *str) +{ + if (strlen(str) >= COMMAND_LINE_SIZE) + return 0; + strlcpy(guestdev_param, str, sizeof(guestdev_param)); + return 1; +} + +__setup("guestdev=", pci_guestdev_setup); + +/* Free sbdf and nodes */ +static void pci_free_sbdf(struct pcidev_sbdf *sbdf) +{ + struct pcidev_sbdf_node *node, *next; + + node = sbdf->child; + while (node) { + next = node->child; + kfree(node); + node = next; + } + /* Skip kfree(sbdf) */ +} + +/* Does PCI device belong to sub tree specified by guestdev with device path? */ +typedef int (*pci_node_match_t)(const struct devicepath_node *gdev_node, + const struct pcidev_sbdf_node *sbdf_node, + int options); + +static int pci_node_match(const struct devicepath_node *gdev_node, + const struct pcidev_sbdf_node *sbdf_node, + int options_unused) +{ + return (gdev_node->dev == sbdf_node->dev && + gdev_node->func == sbdf_node->func); +} + +static int pci_is_in_devicepath_sub_tree(struct guestdev *gdev, + struct pcidev_sbdf *sbdf, + pci_node_match_t match) +{ + int seg, bbn; + struct devicepath_node *gdev_node; + struct pcidev_sbdf_node *sbdf_node; + + if (!gdev || !sbdf) + return FALSE; + + BUG_ON(!(gdev->flags & GUESTDEV_FLAG_DEVICEPATH)); + + /* Compare seg and bbn */ + if (gdev->u.devicepath.seg == INVALID_SEG || + gdev->u.devicepath.bbn == INVALID_BBN) { + if (acpi_pci_get_root_seg_bbn(gdev->u.devicepath.hid, + gdev->u.devicepath.uid, &seg, &bbn)) { + gdev->u.devicepath.seg = seg; + gdev->u.devicepath.bbn = bbn; + } else + return FALSE; + } + + if (gdev->u.devicepath.seg != sbdf->seg || + gdev->u.devicepath.bbn != sbdf->bus) + return FALSE; + + gdev_node = gdev->u.devicepath.child; + sbdf_node = sbdf->child; + + /* Compare dev and func */ + while (gdev_node) { + if (!sbdf_node) + return FALSE; + if (!match(gdev_node, sbdf_node, gdev->options)) + return FALSE; + gdev_node = gdev_node->child; + sbdf_node = sbdf_node->child; + } + return TRUE; +} + +/* Get sbdf from device */ +static int pci_get_sbdf_from_pcidev( + struct pci_dev *dev, struct pcidev_sbdf *sbdf) +{ + struct pcidev_sbdf_node *node; + + if (!dev) + return FALSE; + + for(;;) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + printk(KERN_ERR "PCI: Failed to allocate memory.\n"); + goto err_end; + } + memset(node, 0, sizeof(*node)); + node->dev = PCI_SLOT(dev->devfn); + node->func = PCI_FUNC(dev->devfn); + + if (!sbdf->child) + sbdf->child = node; + else { + node->child = sbdf->child; + sbdf->child = node; + } + if (!dev->bus) + goto err_end; + if (!dev->bus->self) + break; + dev = dev->bus->self; + } + if (sscanf(dev_name(&dev->dev), "%04x:%02x", &sbdf->seg, &sbdf->bus) != 2) + goto err_end; + return TRUE; + +err_end: + pci_free_sbdf(sbdf); + return FALSE; +} + +/* Does PCI device belong to sub tree specified by guestdev with sbdf? */ +typedef int (*pci_sbdf_match_t)(const struct guestdev *gdev, + const struct pci_dev *dev); + +static int pci_sbdf_match(const struct guestdev *gdev, + const struct pci_dev *dev) +{ + int seg, bus; + + if (sscanf(dev_name(&dev->dev), "%04x:%02x", &seg, &bus) != 2) + return FALSE; + + return gdev->u.sbdf.seg == seg && + gdev->u.sbdf.bus == bus && + gdev->u.sbdf.dev == PCI_SLOT(dev->devfn) && + gdev->u.sbdf.func == PCI_FUNC(dev->devfn); +} + +static int pci_is_in_sbdf_sub_tree(struct guestdev *gdev, struct pci_dev *dev, + pci_sbdf_match_t match) +{ + BUG_ON(!(gdev->flags & GUESTDEV_FLAG_SBDF)); + for (;;) { + if (match(gdev, dev)) + return TRUE; + if (!dev->bus || !dev->bus->self) + break; + dev = dev->bus->self; + } + return FALSE; +} + +/* Does PCI device belong to sub tree specified by guestdev parameter? */ +static int __pci_is_guestdev(struct pci_dev *dev, pci_node_match_t node_match, + pci_sbdf_match_t sbdf_match) +{ + struct guestdev *gdev; + struct pcidev_sbdf pcidev_sbdf, *sbdf = NULL; + struct list_head *head; + int result = FALSE; + + if (!dev) + return FALSE; + + list_for_each(head, &guestdev_list) { + gdev = list_entry(head, struct guestdev, root_list); + switch (gdev->flags & GUESTDEV_FLAG_TYPE_MASK) { + case GUESTDEV_FLAG_DEVICEPATH: + if (sbdf == NULL) { + sbdf = &pcidev_sbdf; + memset(sbdf, 0 ,sizeof(*sbdf)); + if (!pci_get_sbdf_from_pcidev(dev, sbdf)) + goto out; + } + if (pci_is_in_devicepath_sub_tree(gdev, sbdf, + node_match)) { + result = TRUE; + goto out; + } + break; + case GUESTDEV_FLAG_SBDF: + if (pci_is_in_sbdf_sub_tree(gdev, dev, sbdf_match)) { + result = TRUE; + goto out; + } + break; + default: + BUG(); + } + } +out: + if (sbdf) + pci_free_sbdf(sbdf); + return result; +} + +int pci_is_guestdev(struct pci_dev *dev) +{ + return __pci_is_guestdev(dev, pci_node_match, pci_sbdf_match); +} +EXPORT_SYMBOL_GPL(pci_is_guestdev); + +static int reassign_resources; + +static int __init pci_set_reassign_resources(char *str) +{ + reassign_resources = 1; + + return 1; +} +__setup("reassign_resources", pci_set_reassign_resources); + +int pci_is_guestdev_to_reassign(struct pci_dev *dev) +{ + if (reassign_resources) + return pci_is_guestdev(dev); + return FALSE; +} + +#ifdef CONFIG_PCI_IOMULTI +static int pci_iomul_node_match(const struct devicepath_node *gdev_node, + const struct pcidev_sbdf_node *sbdf_node, + int options) +{ + return (options & GUESTDEV_OPT_IOMUL) && + ((gdev_node->child != NULL && + sbdf_node->child != NULL && + gdev_node->dev == sbdf_node->dev && + gdev_node->func == sbdf_node->func) || + (gdev_node->child == NULL && + sbdf_node->child == NULL && + gdev_node->dev == sbdf_node->dev)); +} + +static int pci_iomul_sbdf_match(const struct guestdev *gdev, + const struct pci_dev *dev) +{ + int seg, bus; + + if (sscanf(dev_name(&dev->dev), "%04x:%02x", &seg, &bus) != 2) + return FALSE; + + return (gdev->options & GUESTDEV_OPT_IOMUL) && + gdev->u.sbdf.seg == seg && + gdev->u.sbdf.bus == bus && + gdev->u.sbdf.dev == PCI_SLOT(dev->devfn); +} + +int pci_is_iomuldev(struct pci_dev *dev) +{ + return __pci_is_guestdev(dev, + pci_iomul_node_match, pci_iomul_sbdf_match); +} +#endif /* CONFIG_PCI_IOMULTI */ + +/* Check whether the devicepath exists under the pci root bus */ +static int __init pci_check_devicepath_exists( + struct guestdev *gdev, struct pci_bus *bus) +{ + struct devicepath_node *node; + struct pci_dev *dev; + + BUG_ON(!(gdev->flags & GUESTDEV_FLAG_DEVICEPATH)); + + node = gdev->u.devicepath.child; + while (node) { + if (!bus) + return FALSE; + dev = pci_get_slot(bus, PCI_DEVFN(node->dev, node->func)); + if (!dev) + return FALSE; + bus = dev->subordinate; + node = node->child; + pci_dev_put(dev); + } + return TRUE; +} + +/* Check whether the guestdev exists in the PCI device tree */ +static int __init pci_check_guestdev_exists(void) +{ + struct list_head *head; + struct guestdev *gdev; + int seg, bbn; + struct pci_bus *bus; + struct pci_dev *dev; + char path_str[GUESTDEV_STR_MAX]; + + list_for_each(head, &guestdev_list) { + gdev = list_entry(head, struct guestdev, root_list); + switch (gdev->flags & GUESTDEV_FLAG_TYPE_MASK) { + case GUESTDEV_FLAG_DEVICEPATH: + if (gdev->u.devicepath.seg == INVALID_SEG || + gdev->u.devicepath.bbn == INVALID_BBN) { + if (acpi_pci_get_root_seg_bbn( + gdev->u.devicepath.hid, + gdev->u.devicepath.uid, &seg, &bbn)) { + gdev->u.devicepath.seg = seg; + gdev->u.devicepath.bbn = bbn; + } else { + pci_make_guestdev_str(gdev, + path_str, GUESTDEV_STR_MAX); + printk(KERN_INFO + "PCI: Device does not exist. %s\n", + path_str); + continue; + } + } + + bus = pci_find_bus(gdev->u.devicepath.seg, + gdev->u.devicepath.bbn); + if (!bus || + !pci_check_devicepath_exists(gdev, bus)) { + pci_make_guestdev_str(gdev, path_str, + GUESTDEV_STR_MAX); + printk(KERN_INFO + "PCI: Device does not exist. %s\n", + path_str); + } + break; + case GUESTDEV_FLAG_SBDF: + bus = pci_find_bus(gdev->u.sbdf.seg, gdev->u.sbdf.bus); + if (bus) { + dev = pci_get_slot(bus, + PCI_DEVFN(gdev->u.sbdf.dev, + gdev->u.sbdf.func)); + if (dev) { + pci_dev_put(dev); + continue; + } + } + pci_make_guestdev_str(gdev, path_str, GUESTDEV_STR_MAX); + printk(KERN_INFO "PCI: Device does not exist. %s\n", + path_str); + break; + default: + BUG(); + } + } + return 0; +} + +fs_initcall(pci_check_guestdev_exists); + --- linux-ec2-2.6.31.orig/drivers/pci/pci.h +++ linux-ec2-2.6.31/drivers/pci/pci.h @@ -243,7 +243,6 @@ extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); -extern int pci_sriov_resource_alignment(struct pci_dev *dev, int resno); extern void pci_restore_iov_state(struct pci_dev *dev); extern int pci_iov_bus_range(struct pci_bus *bus); @@ -299,16 +298,26 @@ } #endif /* CONFIG_PCI_IOV */ -static inline int pci_resource_alignment(struct pci_dev *dev, - struct resource *res) +#ifdef CONFIG_PCI_GUESTDEV +extern int pci_is_guestdev_to_reassign(struct pci_dev *dev); +extern int pci_is_iomuldev(struct pci_dev *dev); +#else +#define pci_is_iomuldev(dev) 0 +#endif + +#ifdef CONFIG_PCI_RESERVE +unsigned long pci_reserve_size_io(struct pci_bus *bus); +unsigned long pci_reserve_size_mem(struct pci_bus *bus); +#else +static inline unsigned long pci_reserve_size_io(struct pci_bus *bus) { -#ifdef CONFIG_PCI_IOV - int resno = res - dev->resource; + return 0; +} - if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) - return pci_sriov_resource_alignment(dev, resno); -#endif - return resource_alignment(res); +static inline unsigned long pci_reserve_size_mem(struct pci_bus *bus) +{ + return 0; } +#endif /* CONFIG_PCI_RESERVE */ #endif /* DRIVERS_PCI_H */ --- linux-ec2-2.6.31.orig/drivers/pci/iov.c +++ linux-ec2-2.6.31/drivers/pci/iov.c @@ -598,29 +598,6 @@ } /** - * pci_sriov_resource_alignment - get resource alignment for VF BAR - * @dev: the PCI device - * @resno: the resource number - * - * Returns the alignment of the VF BAR found in the SR-IOV capability. - * This is not the same as the resource size which is defined as - * the VF BAR size multiplied by the number of VFs. The alignment - * is just the VF BAR size. - */ -int pci_sriov_resource_alignment(struct pci_dev *dev, int resno) -{ - struct resource tmp; - enum pci_bar_type type; - int reg = pci_iov_resource_bar(dev, resno, &type); - - if (!reg) - return 0; - - __pci_read_base(dev, type, &tmp, reg); - return resource_alignment(&tmp); -} - -/** * pci_restore_iov_state - restore the state of the IOV capability * @dev: the PCI device */ --- linux-ec2-2.6.31.orig/drivers/pci/Kconfig +++ linux-ec2-2.6.31/drivers/pci/Kconfig @@ -42,6 +42,27 @@ When in doubt, say N. +config PCI_GUESTDEV + bool "PCI Device Reservation for Passthrough" + depends on PCI && ACPI && XEN + default y + help + Say Y here if you want to reserve PCI device for passthrough. + +config PCI_IOMULTI + bool "PCI Device IO Multiplex for Passthrough" + depends on PCI && ACPI && XEN + default y + help + Say Y here if you need io multiplexing. + +config PCI_RESERVE + bool "PCI IO/MEMORY space reserve" + depends on PCI && XEN_PRIVILEGED_GUEST + default y + help + Say Y here if you need PCI IO/MEMORY space reserve + config PCI_STUB tristate "PCI Stub driver" depends on PCI @@ -54,7 +75,7 @@ config HT_IRQ bool "Interrupts on hypertransport devices" default y - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN help This allows native hypertransport devices to use interrupts. --- linux-ec2-2.6.31.orig/drivers/pci/setup-bus.c +++ linux-ec2-2.6.31/drivers/pci/setup-bus.c @@ -341,7 +341,7 @@ #if defined(CONFIG_ISA) || defined(CONFIG_EISA) size = (size & 0xff) + ((size & ~0xffUL) << 2); #endif - size = ALIGN(size + size1, 4096); + size = ALIGN(max(size + size1, pci_reserve_size_io(bus)), 4096); if (!size) { b_res->flags = 0; return; @@ -384,7 +384,7 @@ continue; r_size = resource_size(r); /* For bridges size != alignment */ - align = pci_resource_alignment(dev, r); + align = resource_alignment(r); order = __ffs(align) - 20; if (order > 11) { dev_warn(&dev->dev, "BAR %d bad alignment %llx: " @@ -418,7 +418,8 @@ min_align = align1 >> 1; align += aligns[order]; } - size = ALIGN(size, min_align); + size = ALIGN(max(size, (resource_size_t)pci_reserve_size_mem(bus)), + min_align); if (!size) { b_res->flags = 0; return 1; --- linux-ec2-2.6.31.orig/drivers/pci/iomulti.c +++ linux-ec2-2.6.31/drivers/pci/iomulti.c @@ -0,0 +1,1415 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "pci.h" +#include "iomulti.h" + +#define PCI_NUM_BARS 6 +#define PCI_BUS_MAX 255 +#define PCI_DEV_MAX 31 +#define PCI_FUNC_MAX 7 +#define PCI_NUM_FUNC 8 + +/* see pci_resource_len */ +static inline resource_size_t pci_iomul_len(const struct resource* r) +{ + if (r->start == 0 && r->start == r->end) + return 0; + return r->end - r->start + 1; +} + +#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1)) +/* stolen from pbus_size_io() */ +static unsigned long pdev_size_io(struct pci_dev *pdev) +{ + unsigned long size = 0, size1 = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &pdev->resource[i]; + unsigned long r_size; + + if (!(r->flags & IORESOURCE_IO)) + continue; + + r_size = r->end - r->start + 1; + + if (r_size < 0x400) + /* Might be re-aligned for ISA */ + size += r_size; + else + size1 += r_size; + } + +/* To be fixed in 2.5: we should have sort of HAVE_ISA + flag in the struct pci_bus. */ +#if defined(CONFIG_ISA) || defined(CONFIG_EISA) + size = (size & 0xff) + ((size & ~0xffUL) << 2); +#endif + size = ROUND_UP(size + size1, 4096); + return size; +} + +/* + * primary bus number of PCI-PCI bridge in switch on which + * this slots sits. + * i.e. the primary bus number of PCI-PCI bridge of downstream port + * or root port in switch. + * the secondary bus number of PCI-PCI bridge of upstream port + * in switch. + */ +static inline unsigned char pci_dev_switch_busnr(struct pci_dev *pdev) +{ + if (pci_find_capability(pdev, PCI_CAP_ID_EXP)) + return pdev->bus->primary; + return pdev->bus->number; +} + +struct pci_iomul_func { + int segment; + uint8_t bus; + uint8_t devfn; + + /* only start and end are used */ + unsigned long io_size; + uint8_t io_bar; + struct resource resource[PCI_NUM_BARS]; + struct resource dummy_parent; +}; + +struct pci_iomul_switch { + struct list_head list; /* bus_list_lock protects */ + + /* + * This lock the following entry and following + * pci_iomul_slot/pci_iomul_func. + */ + struct mutex lock; + struct kref kref; + + struct resource io_resource; + struct resource *io_region; + unsigned int count; + struct pci_dev *current_pdev; + + int segment; + uint8_t bus; + + uint32_t io_base; + uint32_t io_limit; + + /* func which has the largeset io size*/ + struct pci_iomul_func *func; + + struct list_head slots; +}; + +struct pci_iomul_slot { + struct list_head sibling; + struct kref kref; + /* + * busnr + * when pcie, the primary busnr of the PCI-PCI bridge on which + * this devices sits. + */ + uint8_t switch_busnr; + struct resource dummy_parent[PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES]; + + /* device */ + int segment; + uint8_t bus; + uint8_t dev; + + struct pci_iomul_func *func[PCI_NUM_FUNC]; +}; + +static LIST_HEAD(switch_list); +static DEFINE_MUTEX(switch_list_lock); + +/*****************************************************************************/ +static int inline pci_iomul_switch_io_allocated( + const struct pci_iomul_switch *sw) +{ + return !(sw->io_base == 0 || sw->io_base > sw->io_limit); +} + +static struct pci_iomul_switch *pci_iomul_find_switch_locked(int segment, + uint8_t bus) +{ + struct pci_iomul_switch *sw; + + BUG_ON(!mutex_is_locked(&switch_list_lock)); + list_for_each_entry(sw, &switch_list, list) { + if (sw->segment == segment && sw->bus == bus) + return sw; + } + return NULL; +} + +static struct pci_iomul_slot *pci_iomul_find_slot_locked( + struct pci_iomul_switch *sw, uint8_t busnr, uint8_t dev) +{ + struct pci_iomul_slot *slot; + + BUG_ON(!mutex_is_locked(&sw->lock)); + list_for_each_entry(slot, &sw->slots, sibling) { + if (slot->bus == busnr && slot->dev == dev) + return slot; + } + return NULL; +} + +static void pci_iomul_switch_get(struct pci_iomul_switch *sw); +/* on successfull exit, sw->lock is locked for use slot and + * refrence count of sw is incremented. + */ +static void pci_iomul_get_lock_switch(struct pci_dev *pdev, + struct pci_iomul_switch **swp, + struct pci_iomul_slot **slot) +{ + mutex_lock(&switch_list_lock); + + *swp = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus), + pci_dev_switch_busnr(pdev)); + if (*swp == NULL) { + *slot = NULL; + goto out; + } + + mutex_lock(&(*swp)->lock); + *slot = pci_iomul_find_slot_locked(*swp, pdev->bus->number, + PCI_SLOT(pdev->devfn)); + if (*slot == NULL) { + mutex_unlock(&(*swp)->lock); + *swp = NULL; + } else { + pci_iomul_switch_get(*swp); + } +out: + mutex_unlock(&switch_list_lock); +} + +static struct pci_iomul_switch *pci_iomul_switch_alloc(int segment, + uint8_t bus) +{ + struct pci_iomul_switch *sw; + + BUG_ON(!mutex_is_locked(&switch_list_lock)); + + sw = kmalloc(sizeof(*sw), GFP_KERNEL); + + mutex_init(&sw->lock); + kref_init(&sw->kref); + sw->io_region = NULL; + sw->count = 0; + sw->current_pdev = NULL; + sw->segment = segment; + sw->bus = bus; + sw->io_base = 0; + sw->io_limit = 0; + sw->func = NULL; + INIT_LIST_HEAD(&sw->slots); + + return sw; +} + +static void pci_iomul_switch_add_locked(struct pci_iomul_switch *sw) +{ + BUG_ON(!mutex_is_locked(&switch_list_lock)); + list_add(&sw->list, &switch_list); +} + +#ifdef CONFIG_HOTPLUG_PCI +static void pci_iomul_switch_del_locked(struct pci_iomul_switch *sw) +{ + BUG_ON(!mutex_is_locked(&switch_list_lock)); + list_del(&sw->list); +} +#endif + +static void pci_iomul_switch_get(struct pci_iomul_switch *sw) +{ + kref_get(&sw->kref); +} + +static void pci_iomul_switch_release(struct kref *kref) +{ + struct pci_iomul_switch *sw = container_of(kref, + struct pci_iomul_switch, + kref); + kfree(sw); +} + +static void pci_iomul_switch_put(struct pci_iomul_switch *sw) +{ + kref_put(&sw->kref, &pci_iomul_switch_release); +} + +static int __devinit pci_iomul_slot_init(struct pci_dev *pdev, + struct pci_iomul_slot *slot) +{ + u16 rpcap; + u16 cap; + + rpcap = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!rpcap) { + /* pci device isn't supported */ + printk(KERN_INFO + "PCI: sharing io port of non PCIe device %s " + "isn't supported. ignoring.\n", + pci_name(pdev)); + return -ENOSYS; + } + + pci_read_config_word(pdev, rpcap + PCI_CAP_FLAGS, &cap); + switch ((cap & PCI_EXP_FLAGS_TYPE) >> 4) { + case PCI_EXP_TYPE_RC_END: + printk(KERN_INFO + "PCI: io port sharing of root complex integrated " + "endpoint %s isn't supported. ignoring.\n", + pci_name(pdev)); + return -ENOSYS; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + break; + default: + printk(KERN_INFO + "PCI: io port sharing of non endpoint %s " + "doesn't make sense. ignoring.\n", + pci_name(pdev)); + return -EINVAL; + } + + kref_init(&slot->kref); + slot->switch_busnr = pci_dev_switch_busnr(pdev); + slot->segment = pci_domain_nr(pdev->bus); + slot->bus = pdev->bus->number; + slot->dev = PCI_SLOT(pdev->devfn); + + return 0; +} + +static struct pci_iomul_slot *__devinit +pci_iomul_slot_alloc(struct pci_dev *pdev) +{ + struct pci_iomul_slot *slot; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (slot == NULL) + return NULL; + + if (pci_iomul_slot_init(pdev, slot) != 0) { + kfree(slot); + return NULL; + } + return slot; +} + +static void pci_iomul_slot_add_locked(struct pci_iomul_switch *sw, + struct pci_iomul_slot *slot) +{ + BUG_ON(!mutex_is_locked(&sw->lock)); + list_add(&slot->sibling, &sw->slots); +} + +#ifdef CONFIG_HOTPLUG_PCI +static void pci_iomul_slot_del_locked(struct pci_iomul_switch *sw, + struct pci_iomul_slot *slot) +{ + BUG_ON(!mutex_is_locked(&sw->lock)); + list_del(&slot->sibling); +} +#endif + +static void pci_iomul_slot_get(struct pci_iomul_slot *slot) +{ + kref_get(&slot->kref); +} + +static void pci_iomul_slot_release(struct kref *kref) +{ + struct pci_iomul_slot *slot = container_of(kref, struct pci_iomul_slot, + kref); + kfree(slot); +} + +static void pci_iomul_slot_put(struct pci_iomul_slot *slot) +{ + kref_put(&slot->kref, &pci_iomul_slot_release); +} + +/*****************************************************************************/ +static int pci_get_sbd(const char *str, + int *segment__, uint8_t *bus__, uint8_t *dev__) +{ + int segment; + int bus; + int dev; + + if (sscanf(str, "%x:%x:%x", &segment, &bus, &dev) != 3) { + if (sscanf(str, "%x:%x", &bus, &dev) == 2) + segment = 0; + else + return -EINVAL; + } + + if (segment < 0 || INT_MAX <= segment) + return -EINVAL; + if (bus < 0 || PCI_BUS_MAX < bus) + return -EINVAL; + if (dev < 0 || PCI_DEV_MAX < dev) + return -EINVAL; + + *segment__ = segment; + *bus__ = bus; + *dev__ = dev; + return 0; +} + +static char iomul_param[COMMAND_LINE_SIZE]; +#define TOKEN_MAX 10 /* SSSS:BB:DD length is 10 */ +static int pci_is_iomul_dev_param(struct pci_dev *pdev) +{ + int len; + char *p; + char *next_str; + + for (p = &iomul_param[0]; *p != '\0'; p = next_str + 1) { + next_str = strchr(p, ','); + if (next_str != NULL) + len = next_str - p; + else + len = strlen(p); + + if (len > 0 && len <= TOKEN_MAX) { + char tmp[TOKEN_MAX+1]; + int seg; + uint8_t bus; + uint8_t dev; + + strlcpy(tmp, p, len); + if (pci_get_sbd(tmp, &seg, &bus, &dev) == 0 && + pci_domain_nr(pdev->bus) == seg && + pdev->bus->number == bus && + PCI_SLOT(pdev->devfn) == dev) + return 1; + } + if (next_str == NULL) + break; + } + + /* check guestcev=+iomul option */ + return pci_is_iomuldev(pdev); +} + +/* + * Format: [:]:[,[:]:[,...] + */ +static int __init pci_iomul_param_setup(char *str) +{ + if (strlen(str) >= COMMAND_LINE_SIZE) + return 0; + + /* parse it after pci bus scanning */ + strlcpy(iomul_param, str, sizeof(iomul_param)); + return 1; +} +__setup("guestiomuldev=", pci_iomul_param_setup); + +/*****************************************************************************/ +static void __devinit pci_iomul_set_bridge_io_window(struct pci_dev *bridge, + uint32_t io_base, + uint32_t io_limit) +{ + uint16_t l; + uint32_t upper16; + + io_base >>= 12; + io_base <<= 4; + io_limit >>= 12; + io_limit <<= 4; + l = (io_base & 0xff) | ((io_limit & 0xff) << 8); + upper16 = ((io_base & 0xffff00) >> 8) | + (((io_limit & 0xffff00) >> 8) << 16); + + /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); + /* Update lower 16 bits of I/O base/limit. */ + pci_write_config_word(bridge, PCI_IO_BASE, l); + /* Update upper 16 bits of I/O base/limit. */ + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, upper16); +} + +static void __devinit pci_disable_bridge_io_window(struct pci_dev *bridge) +{ + /* set base = 0xffffff limit = 0x0 */ + pci_iomul_set_bridge_io_window(bridge, 0xffffff, 0); +} + +static int __devinit pci_iomul_func_scan(struct pci_dev *pdev, + struct pci_iomul_slot *slot, + uint8_t func) +{ + struct pci_iomul_func *f; + unsigned int i; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (f == NULL) + return -ENOMEM; + + f->segment = slot->segment; + f->bus = slot->bus; + f->devfn = PCI_DEVFN(slot->dev, func); + f->io_size = pdev_size_io(pdev); + + for (i = 0; i < PCI_NUM_BARS; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO)) + continue; + if (pci_resource_len(pdev, i) == 0) + continue; + + f->io_bar |= 1 << i; + f->resource[i] = pdev->resource[i]; + } + + if (f->io_bar) + slot->func[func] = f; + else + kfree(f); + return 0; +} + +/* + * This is tricky part. + * fake PCI resource assignment routines by setting flags to 0. + * PCI resource allocate routines think the resource should + * be allocated by checking flags. 0 means this resource isn't used. + * See pbus_size_io() and pdev_sort_resources(). + * + * After allocated resources, flags (IORESOURCE_IO) is exported + * to other part including user process. + * So we have to set flags to IORESOURCE_IO, but at the same time + * we must prevent those resources from reassigning when pci hot plug. + * To achieve that, set r->parent to dummy resource. + */ +static void __devinit pci_iomul_disable_resource(struct resource *r) +{ + /* don't allocate this resource */ + r->flags = 0; +} + +static void __devinit pci_iomul_reenable_resource( + struct resource *dummy_parent, struct resource *r) +{ + int ret; + + dummy_parent->start = r->start; + dummy_parent->end = r->end; + dummy_parent->flags = r->flags; + dummy_parent->name = "PCI IOMUL dummy resource"; + + ret = request_resource(dummy_parent, r); + BUG_ON(ret); +} + +static void __devinit pci_iomul_fixup_ioresource(struct pci_dev *pdev, + struct pci_iomul_func *func, + int reassign, int dealloc) +{ + uint8_t i; + struct resource *r; + + printk(KERN_INFO "PCI: deallocating io resource[%s]. io size 0x%lx\n", + pci_name(pdev), func->io_size); + for (i = 0; i < PCI_NUM_BARS; i++) { + r = &pdev->resource[i]; + if (!(func->io_bar & (1 << i))) + continue; + + if (reassign) { + r->end -= r->start; + r->start = 0; + pci_update_resource(pdev, i); + func->resource[i] = *r; + } + + if (dealloc) + /* don't allocate this resource */ + pci_iomul_disable_resource(r); + } + + /* parent PCI-PCI bridge */ + if (!reassign) + return; + pdev = pdev->bus->self; + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; + pci_disable_bridge_io_window(pdev); + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &pdev->resource[i]; + if (!(r->flags & IORESOURCE_IO)) + continue; + + r->end -= r->start; + r->start = 0; + if (i < PCI_BRIDGE_RESOURCES) + pci_update_resource(pdev, i); + } +} + +static void __devinit __quirk_iomul_dealloc_ioresource( + struct pci_iomul_switch *sw, + struct pci_dev *pdev, struct pci_iomul_slot *slot) +{ + struct pci_iomul_func *f; + struct pci_iomul_func *__f; + + if (pci_iomul_func_scan(pdev, slot, PCI_FUNC(pdev->devfn)) != 0) + return; + + f = slot->func[PCI_FUNC(pdev->devfn)]; + if (f == NULL) + return; + + __f = sw->func; + /* sw->io_base == 0 means that we are called at boot time. + * != 0 means that we are called by php after boot. */ + if (sw->io_base == 0 && + (__f == NULL || __f->io_size < f->io_size)) { + if (__f != NULL) { + struct pci_bus *__pbus; + struct pci_dev *__pdev; + + __pbus = pci_find_bus(__f->segment, __f->bus); + BUG_ON(__pbus == NULL); + __pdev = pci_get_slot(__pbus, __f->devfn); + BUG_ON(__pdev == NULL); + pci_iomul_fixup_ioresource(__pdev, __f, 0, 1); + pci_dev_put(__pdev); + } + + pci_iomul_fixup_ioresource(pdev, f, 1, 0); + sw->func = f; + } else { + pci_iomul_fixup_ioresource(pdev, f, 1, 1); + } +} + +static void __devinit quirk_iomul_dealloc_ioresource(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return; + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; /* PCI Host Bridge isn't a target device */ + if (!pci_is_iomul_dev_param(pdev)) + return; + + mutex_lock(&switch_list_lock); + sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus), + pci_dev_switch_busnr(pdev)); + if (sw == NULL) { + sw = pci_iomul_switch_alloc(pci_domain_nr(pdev->bus), + pci_dev_switch_busnr(pdev)); + if (sw == NULL) { + mutex_unlock(&switch_list_lock); + printk(KERN_WARNING + "PCI: can't allocate memory " + "for sw of IO mulplexing %s", pci_name(pdev)); + return; + } + pci_iomul_switch_add_locked(sw); + } + pci_iomul_switch_get(sw); + mutex_unlock(&switch_list_lock); + + mutex_lock(&sw->lock); + slot = pci_iomul_find_slot_locked(sw, pdev->bus->number, + PCI_SLOT(pdev->devfn)); + if (slot == NULL) { + slot = pci_iomul_slot_alloc(pdev); + if (slot == NULL) { + mutex_unlock(&sw->lock); + pci_iomul_switch_put(sw); + printk(KERN_WARNING "PCI: can't allocate memory " + "for IO mulplexing %s", pci_name(pdev)); + return; + } + pci_iomul_slot_add_locked(sw, slot); + } + + printk(KERN_INFO "PCI: disable device and release io resource[%s].\n", + pci_name(pdev)); + pci_disable_device(pdev); + + __quirk_iomul_dealloc_ioresource(sw, pdev, slot); + + mutex_unlock(&sw->lock); + pci_iomul_switch_put(sw); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, + quirk_iomul_dealloc_ioresource); + +static void __devinit pci_iomul_read_bridge_io(struct pci_iomul_switch *sw) +{ + struct pci_iomul_func *f = sw->func; + + struct pci_bus *pbus; + struct pci_dev *pdev; + struct pci_dev *bridge; + + uint16_t l; + uint16_t base_upper16; + uint16_t limit_upper16; + uint32_t io_base; + uint32_t io_limit; + + pbus = pci_find_bus(f->segment, f->bus); + BUG_ON(pbus == NULL); + + pdev = pci_get_slot(pbus, f->devfn); + BUG_ON(pdev == NULL); + + bridge = pdev->bus->self; + pci_read_config_word(bridge, PCI_IO_BASE, &l); + pci_read_config_word(bridge, PCI_IO_BASE_UPPER16, &base_upper16); + pci_read_config_word(bridge, PCI_IO_LIMIT_UPPER16, &limit_upper16); + + io_base = (l & 0xf0) | ((uint32_t)base_upper16 << 8); + io_base <<= 8; + io_limit = (l >> 8) | ((uint32_t)limit_upper16 << 8); + io_limit <<= 8; + io_limit |= 0xfff; + + sw->io_base = io_base; + sw->io_limit = io_limit; + + pci_dev_put(pdev); + printk(KERN_INFO "PCI: bridge %s base 0x%x limit 0x%x\n", + pci_name(bridge), sw->io_base, sw->io_limit); +} + +static void __devinit pci_iomul_setup_brige(struct pci_dev *bridge, + uint32_t io_base, + uint32_t io_limit) +{ + uint16_t cmd; + + if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; + + pci_iomul_set_bridge_io_window(bridge, io_base, io_limit); + + /* and forcibly enables IO */ + pci_read_config_word(bridge, PCI_COMMAND, &cmd); + if (!(cmd & PCI_COMMAND_IO)) { + cmd |= PCI_COMMAND_IO; + printk(KERN_INFO "PCI: Forcibly Enabling IO %s\n", + pci_name(bridge)); + pci_write_config_word(bridge, PCI_COMMAND, cmd); + } +} + +struct __bar { + unsigned long size; + uint8_t bar; +}; + +/* decending order */ +static int __devinit pci_iomul_bar_cmp(const void *lhs__, const void *rhs__) +{ + const struct __bar *lhs = (struct __bar*)lhs__; + const struct __bar *rhs = (struct __bar*)rhs__; + return - (lhs->size - rhs->size); +} + +static void __devinit pci_iomul_setup_dev(struct pci_dev *pdev, + struct pci_iomul_func *f, + uint32_t io_base) +{ + struct __bar bars[PCI_NUM_BARS]; + int i; + uint8_t num_bars = 0; + struct resource *r; + + printk(KERN_INFO "PCI: Forcibly assign IO %s from 0x%x\n", + pci_name(pdev), io_base); + + for (i = 0; i < PCI_NUM_BARS; i++) { + if (!(f->io_bar & (1 << i))) + continue; + + r = &f->resource[i]; + bars[num_bars].size = pci_iomul_len(r); + bars[num_bars].bar = i; + + num_bars++; + } + + sort(bars, num_bars, sizeof(bars[0]), &pci_iomul_bar_cmp, NULL); + + for (i = 0; i < num_bars; i++) { + struct resource *fr = &f->resource[bars[i].bar]; + r = &pdev->resource[bars[i].bar]; + + BUG_ON(r->start != 0); + r->start += io_base; + r->end += io_base; + + fr->start = r->start; + fr->end = r->end; + + /* pci_update_resource() check flags. */ + r->flags = fr->flags; + pci_update_resource(pdev, bars[i].bar); + pci_iomul_reenable_resource(&f->dummy_parent, r); + + io_base += bars[i].size; + } +} + +static void __devinit pci_iomul_release_io_resource( + struct pci_dev *pdev, struct pci_iomul_switch *sw, + struct pci_iomul_slot *slot, struct pci_iomul_func *f) +{ + int i; + struct resource *r; + + for (i = 0; i < PCI_NUM_BARS; i++) { + if (pci_resource_flags(pdev, i) & IORESOURCE_IO && + pdev->resource[i].parent != NULL) { + r = &pdev->resource[i]; + f->resource[i] = *r; + release_resource(r); + pci_iomul_reenable_resource(&f->dummy_parent, r); + } + } + + /* parent PCI-PCI bridge */ + pdev = pdev->bus->self; + if ((pdev->class >> 8) != PCI_CLASS_BRIDGE_HOST) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + struct resource *parent = pdev->resource[i].parent; + + if (pci_resource_flags(pdev, i) & IORESOURCE_IO && + parent != NULL) { + r = &pdev->resource[i]; + + sw->io_resource.flags = r->flags; + sw->io_resource.start = sw->io_base; + sw->io_resource.end = sw->io_limit; + sw->io_resource.name = "PCI IO Multiplexer"; + + release_resource(r); + pci_iomul_reenable_resource( + &slot->dummy_parent[i - PCI_BRIDGE_RESOURCES], r); + + if (request_resource(parent, + &sw->io_resource)) + printk(KERN_ERR + "PCI IOMul: can't allocate " + "resource. [0x%x, 0x%x]", + sw->io_base, sw->io_limit); + } + } + } +} + +static void __devinit quirk_iomul_reassign_ioresource(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + struct pci_iomul_func *sf; + struct pci_iomul_func *f; + + pci_iomul_get_lock_switch(pdev, &sw, &slot); + if (sw == NULL || slot == NULL) + return; + + if (sw->io_base == 0) + pci_iomul_read_bridge_io(sw); + if (!pci_iomul_switch_io_allocated(sw)) + goto out; + + sf = sw->func; + f = slot->func[PCI_FUNC(pdev->devfn)]; + if (f == NULL) + /* (sf == NULL || f == NULL) case + * can happen when all the specified devices + * don't have io space + */ + goto out; + + if (sf != NULL && + (pci_domain_nr(pdev->bus) != sf->segment || + pdev->bus->number != sf->bus || + PCI_SLOT(pdev->devfn) != PCI_SLOT(sf->devfn)) && + PCI_FUNC(pdev->devfn) == 0) { + pci_iomul_setup_brige(pdev->bus->self, + sw->io_base, sw->io_limit); + } + + BUG_ON(f->io_size > sw->io_limit - sw->io_base + 1); + if (/* f == sf */ + sf != NULL && + pci_domain_nr(pdev->bus) == sf->segment && + pdev->bus->number == sf->bus && + pdev->devfn == sf->devfn) + pci_iomul_release_io_resource(pdev, sw, slot, f); + else + pci_iomul_setup_dev(pdev, f, sw->io_base); + +out: + mutex_unlock(&sw->lock); + pci_iomul_switch_put(sw); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, + quirk_iomul_reassign_ioresource); + +/*****************************************************************************/ +#ifdef CONFIG_HOTPLUG_PCI +static int __devinit __pci_iomul_notifier_del_device(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + int i; + + pci_iomul_get_lock_switch(pdev, &sw, &slot); + if (sw == NULL || slot == NULL) + return 0; + + if (sw->func == slot->func[PCI_FUNC(pdev->devfn)]) + sw->func = NULL; + kfree(slot->func[PCI_FUNC(pdev->devfn)]); + slot->func[PCI_FUNC(pdev->devfn)] = NULL; + for (i = 0; i < PCI_NUM_FUNC; i++) { + if (slot->func[i] != NULL) + goto out; + } + + pci_iomul_slot_del_locked(sw, slot); + pci_iomul_slot_put(slot); + +out: + mutex_unlock(&sw->lock); + pci_iomul_switch_put(sw); + return 0; +} + +static int __devinit __pci_iomul_notifier_del_switch(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + + mutex_lock(&switch_list_lock); + sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus), + pdev->bus->number); + if (sw == NULL) + goto out; + + pci_iomul_switch_del_locked(sw); + + mutex_lock(&sw->lock); + if (sw->io_resource.parent) + release_resource(&sw->io_resource); + sw->io_base = 0; /* to tell this switch is removed */ + sw->io_limit = 0; + BUG_ON(!list_empty(&sw->slots)); + mutex_unlock(&sw->lock); + +out: + mutex_unlock(&switch_list_lock); + pci_iomul_switch_put(sw); + return 0; +} + +static int __devinit pci_iomul_notifier_del_device(struct pci_dev *pdev) +{ + int ret; + switch (pdev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + ret = __pci_iomul_notifier_del_device(pdev); + break; + case PCI_HEADER_TYPE_BRIDGE: + ret = __pci_iomul_notifier_del_switch(pdev); + break; + default: + printk(KERN_WARNING "PCI IOMUL: " + "device %s has unknown header type %02x, ignoring.\n", + pci_name(pdev), pdev->hdr_type); + ret = -EIO; + break; + } + return ret; +} + +static int __devinit pci_iomul_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + quirk_iomul_reassign_ioresource(pdev); + break; + case BUS_NOTIFY_DEL_DEVICE: + return pci_iomul_notifier_del_device(pdev); + default: + /* nothing */ + break; + } + + return 0; +} + +static struct notifier_block pci_iomul_nb = { + .notifier_call = pci_iomul_notifier, +}; + +static int __init pci_iomul_hotplug_init(void) +{ + bus_register_notifier(&pci_bus_type, &pci_iomul_nb); + return 0; +} + +late_initcall(pci_iomul_hotplug_init); +#endif + +/*****************************************************************************/ +struct pci_iomul_data { + struct mutex lock; + + struct pci_dev *pdev; + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; /* slot::kref */ + struct pci_iomul_func **func; /* when dereferencing, + sw->lock is necessary */ +}; + +static int pci_iomul_func_ioport(struct pci_iomul_func *func, + uint8_t bar, uint64_t offset, int *port) +{ + if (!(func->io_bar & (1 << bar))) + return -EINVAL; + + *port = func->resource[bar].start + offset; + if (*port < func->resource[bar].start || + *port > func->resource[bar].end) + return -EINVAL; + + return 0; +} + +static inline int pci_iomul_valid(struct pci_iomul_data *iomul) +{ + BUG_ON(!mutex_is_locked(&iomul->lock)); + BUG_ON(!mutex_is_locked(&iomul->sw->lock)); + return pci_iomul_switch_io_allocated(iomul->sw) && + *iomul->func != NULL; +} + +static void __pci_iomul_enable_io(struct pci_dev *pdev) +{ + uint16_t cmd; + + pci_dev_get(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_IO; + pci_write_config_word(pdev, PCI_COMMAND, cmd); +} + +static void __pci_iomul_disable_io(struct pci_iomul_data *iomul, + struct pci_dev *pdev) +{ + uint16_t cmd; + + if (!pci_iomul_valid(iomul)) + return; + + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(pdev, PCI_COMMAND, cmd); + pci_dev_put(pdev); +} + +static int pci_iomul_open(struct inode *inode, struct file *filp) +{ + struct pci_iomul_data *iomul; + iomul = kmalloc(sizeof(*iomul), GFP_KERNEL); + if (iomul == NULL) + return -ENOMEM; + + mutex_init(&iomul->lock); + iomul->pdev = NULL; + iomul->sw = NULL; + iomul->slot = NULL; + iomul->func = NULL; + filp->private_data = (void*)iomul; + + return 0; +} + +static int pci_iomul_release(struct inode *inode, struct file *filp) +{ + struct pci_iomul_data *iomul = + (struct pci_iomul_data*)filp->private_data; + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot = NULL; + + mutex_lock(&iomul->lock); + sw = iomul->sw; + slot = iomul->slot; + if (iomul->pdev != NULL) { + if (sw != NULL) { + mutex_lock(&sw->lock); + if (sw->current_pdev == iomul->pdev) { + __pci_iomul_disable_io(iomul, + sw->current_pdev); + sw->current_pdev = NULL; + } + sw->count--; + if (sw->count == 0) { + release_region(sw->io_region->start, sw->io_region->end - sw->io_region->start + 1); + sw->io_region = NULL; + } + mutex_unlock(&sw->lock); + } + pci_dev_put(iomul->pdev); + } + mutex_unlock(&iomul->lock); + + if (slot != NULL) + pci_iomul_slot_put(slot); + if (sw != NULL) + pci_iomul_switch_put(sw); + kfree(iomul); + return 0; +} + +static long pci_iomul_setup(struct pci_iomul_data *iomul, + struct pci_iomul_setup __user *arg) +{ + long error = 0; + struct pci_iomul_setup setup; + struct pci_iomul_switch *sw = NULL; + struct pci_iomul_slot *slot; + struct pci_bus *pbus; + struct pci_dev *pdev; + + if (copy_from_user(&setup, arg, sizeof(setup))) + return -EFAULT; + + pbus = pci_find_bus(setup.segment, setup.bus); + if (pbus == NULL) + return -ENODEV; + pdev = pci_get_slot(pbus, setup.dev); + if (pdev == NULL) + return -ENODEV; + + mutex_lock(&iomul->lock); + if (iomul->sw != NULL) { + error = -EBUSY; + goto out0; + } + + pci_iomul_get_lock_switch(pdev, &sw, &slot); + if (sw == NULL || slot == NULL) { + error = -ENODEV; + goto out0; + } + if (!pci_iomul_switch_io_allocated(sw)) { + error = -ENODEV; + goto out; + } + + if (slot->func[setup.func] == NULL) { + error = -ENODEV; + goto out; + } + + if (sw->count == 0) { + BUG_ON(sw->io_region != NULL); + sw->io_region = + request_region(sw->io_base, + sw->io_limit - sw->io_base + 1, + "PCI IO Multiplexer driver"); + if (sw->io_region == NULL) { + mutex_unlock(&sw->lock); + error = -EBUSY; + goto out; + } + } + sw->count++; + pci_iomul_slot_get(slot); + + iomul->pdev = pdev; + iomul->sw = sw; + iomul->slot = slot; + iomul->func = &slot->func[setup.func]; + +out: + mutex_unlock(&sw->lock); +out0: + mutex_unlock(&iomul->lock); + if (error != 0) { + if (sw != NULL) + pci_iomul_switch_put(sw); + pci_dev_put(pdev); + } + return error; +} + +static int pci_iomul_lock(struct pci_iomul_data *iomul, + struct pci_iomul_switch **sw, + struct pci_iomul_func **func) +{ + mutex_lock(&iomul->lock); + *sw = iomul->sw; + if (*sw == NULL) { + mutex_unlock(&iomul->lock); + return -ENODEV; + } + mutex_lock(&(*sw)->lock); + if (!pci_iomul_valid(iomul)) { + mutex_unlock(&(*sw)->lock); + mutex_unlock(&iomul->lock); + return -ENODEV; + } + *func = *iomul->func; + + return 0; +} + +static long pci_iomul_disable_io(struct pci_iomul_data *iomul) +{ + long error = 0; + struct pci_iomul_switch *sw; + struct pci_iomul_func *dummy_func; + struct pci_dev *pdev; + + if (pci_iomul_lock(iomul, &sw, &dummy_func) < 0) + return -ENODEV; + + pdev = iomul->pdev; + if (pdev == NULL) + error = -ENODEV; + + if (pdev != NULL && sw->current_pdev == pdev) { + __pci_iomul_disable_io(iomul, pdev); + sw->current_pdev = NULL; + } + + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + return error; +} + +static void pci_iomul_switch_to( + struct pci_iomul_data *iomul, struct pci_iomul_switch *sw, + struct pci_dev *next_pdev) +{ + if (sw->current_pdev == next_pdev) + /* nothing to do */ + return; + + if (sw->current_pdev != NULL) + __pci_iomul_disable_io(iomul, sw->current_pdev); + + __pci_iomul_enable_io(next_pdev); + sw->current_pdev = next_pdev; +} + +static long pci_iomul_in(struct pci_iomul_data *iomul, + struct pci_iomul_in __user *arg) +{ + struct pci_iomul_in in; + struct pci_iomul_switch *sw; + struct pci_iomul_func *func; + + long error = 0; + int port; + uint32_t value = 0; + + if (copy_from_user(&in, arg, sizeof(in))) + return -EFAULT; + + if (pci_iomul_lock(iomul, &sw, &func) < 0) + return -ENODEV; + + error = pci_iomul_func_ioport(func, in.bar, in.offset, &port); + if (error) + goto out; + + pci_iomul_switch_to(iomul, sw, iomul->pdev); + switch (in.size) { + case 4: + value = inl(port); + break; + case 2: + value = inw(port); + break; + case 1: + value = inb(port); + break; + default: + error = -EINVAL; + break; + } + +out: + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + + if (error == 0 && put_user(value, &arg->value)) + return -EFAULT; + return error; +} + +static long pci_iomul_out(struct pci_iomul_data *iomul, + struct pci_iomul_out __user *arg) +{ + struct pci_iomul_in out; + struct pci_iomul_switch *sw; + struct pci_iomul_func *func; + + long error = 0; + int port; + + if (copy_from_user(&out, arg, sizeof(out))) + return -EFAULT; + + if (pci_iomul_lock(iomul, &sw, &func) < 0) + return -ENODEV; + + error = pci_iomul_func_ioport(func, out.bar, out.offset, &port); + if (error) + goto out; + + pci_iomul_switch_to(iomul, sw, iomul->pdev); + switch (out.size) { + case 4: + outl(out.value, port); + break; + case 2: + outw(out.value, port); + break; + case 1: + outb(out.value, port); + break; + default: + error = -EINVAL; + break; + } + +out: + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + return error; +} + +static long pci_iomul_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + long error; + struct pci_iomul_data *iomul = + (struct pci_iomul_data*)filp->private_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EPERM; + + switch (cmd) { + case PCI_IOMUL_SETUP: + error = pci_iomul_setup(iomul, + (struct pci_iomul_setup __user *)arg); + break; + case PCI_IOMUL_DISABLE_IO: + error = pci_iomul_disable_io(iomul); + break; + case PCI_IOMUL_IN: + error = pci_iomul_in(iomul, (struct pci_iomul_in __user *)arg); + break; + case PCI_IOMUL_OUT: + error = pci_iomul_out(iomul, + (struct pci_iomul_out __user *)arg); + break; + default: + error = -ENOSYS; + break; + } + + return error; +} + +static const struct file_operations pci_iomul_fops = { + .owner = THIS_MODULE, + + .open = pci_iomul_open, /* nonseekable_open */ + .release = pci_iomul_release, + + .unlocked_ioctl = pci_iomul_ioctl, +}; + +static struct miscdevice pci_iomul_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "pci_iomul", + .fops = &pci_iomul_fops, +}; + +static int pci_iomul_init(void) +{ + int error; + error = misc_register(&pci_iomul_miscdev); + if (error != 0) { + printk(KERN_ALERT "Couldn't register /dev/misc/pci_iomul"); + return error; + } + printk("PCI IO multiplexer device installed.\n"); + return 0; +} + +#if 0 +static void pci_iomul_cleanup(void) +{ + misc_deregister(&pci_iomul_miscdev); +} +#endif + +/* + * This must be called after pci fixup final which is called by + * device_initcall(pci_init). + */ +late_initcall(pci_iomul_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Isaku Yamahata "); +MODULE_DESCRIPTION("PCI IO space multiplexing driver"); --- linux-ec2-2.6.31.orig/drivers/pci/Makefile +++ linux-ec2-2.6.31/drivers/pci/Makefile @@ -7,6 +7,9 @@ irq.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSFS) += slot.o +obj-$(CONFIG_PCI_GUESTDEV) += guestdev.o +obj-$(CONFIG_PCI_IOMULTI) += iomulti.o +obj-$(CONFIG_PCI_RESERVE) += reserve.o # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ --- linux-ec2-2.6.31.orig/drivers/pci/dmar.c +++ linux-ec2-2.6.31/drivers/pci/dmar.c @@ -632,20 +632,31 @@ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + goto err_unmap; + } + #ifdef CONFIG_DMAR agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR "Cannot get a valid agaw for iommu (seq_id = %d)\n", iommu->seq_id); - goto error; + goto err_unmap; } msagaw = iommu_calculate_max_sagaw(iommu); if (msagaw < 0) { printk(KERN_ERR "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); - goto error; + goto err_unmap; } #endif iommu->agaw = agaw; @@ -665,7 +676,7 @@ } ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", (unsigned long long)drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), (unsigned long long)iommu->cap, @@ -675,7 +686,10 @@ drhd->iommu = iommu; return 0; -error: + + err_unmap: + iounmap(iommu->reg); + error: kfree(iommu); return -1; } --- linux-ec2-2.6.31.orig/drivers/pci/iomulti.h +++ linux-ec2-2.6.31/drivers/pci/iomulti.h @@ -0,0 +1,51 @@ +#ifndef PCI_IOMULTI_H +#define PCI_IOMULTI_H +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +struct pci_iomul_setup { + uint16_t segment; + uint8_t bus; + uint8_t dev; + uint8_t func; +}; + +struct pci_iomul_in { + uint8_t bar; + uint64_t offset; + + uint8_t size; + uint32_t value; +}; + +struct pci_iomul_out { + uint8_t bar; + uint64_t offset; + + uint8_t size; + uint32_t value; +}; + +#define PCI_IOMUL_SETUP _IOW ('P', 0, struct pci_iomul_setup) +#define PCI_IOMUL_DISABLE_IO _IO ('P', 1) +#define PCI_IOMUL_IN _IOWR('P', 2, struct pci_iomul_in) +#define PCI_IOMUL_OUT _IOW ('P', 3, struct pci_iomul_out) + +#endif /* PCI_IOMULTI_H */ --- linux-ec2-2.6.31.orig/drivers/pci/pcie/aer/aerdrv.c +++ linux-ec2-2.6.31/drivers/pci/pcie/aer/aerdrv.c @@ -52,7 +52,7 @@ static struct pcie_port_service_driver aerdriver = { .name = "aer", - .port_type = PCIE_ANY_PORT, + .port_type = PCIE_RC_PORT, .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, --- linux-ec2-2.6.31.orig/drivers/oprofile/oprof.c +++ linux-ec2-2.6.31/drivers/oprofile/oprof.c @@ -5,6 +5,10 @@ * @remark Read the file COPYING * * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #include @@ -33,6 +37,34 @@ */ static int timer = 0; +#ifdef CONFIG_XEN +int oprofile_set_active(int active_domains[], unsigned int adomains) +{ + int err; + + if (!oprofile_ops.set_active) + return -EINVAL; + + mutex_lock(&start_mutex); + err = oprofile_ops.set_active(active_domains, adomains); + mutex_unlock(&start_mutex); + return err; +} + +int oprofile_set_passive(int passive_domains[], unsigned int pdomains) +{ + int err; + + if (!oprofile_ops.set_passive) + return -EINVAL; + + mutex_lock(&start_mutex); + err = oprofile_ops.set_passive(passive_domains, pdomains); + mutex_unlock(&start_mutex); + return err; +} +#endif + int oprofile_setup(void) { int err; --- linux-ec2-2.6.31.orig/drivers/oprofile/oprof.h +++ linux-ec2-2.6.31/drivers/oprofile/oprof.h @@ -36,4 +36,7 @@ int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_active(int active_domains[], unsigned int adomains); +int oprofile_set_passive(int passive_domains[], unsigned int pdomains); + #endif /* OPROF_H */ --- linux-ec2-2.6.31.orig/drivers/oprofile/cpu_buffer.c +++ linux-ec2-2.6.31/drivers/oprofile/cpu_buffer.c @@ -8,6 +8,10 @@ * @author Barry Kasindorf * @author Robert Richter * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * * Each CPU has a local buffer that stores PC value/event * pairs. We also log context switches when we notice them. * Eventually each CPU's buffer is processed into the global @@ -55,6 +59,12 @@ #define DEFAULT_TIMER_EXPIRE (HZ / 10) static int work_enabled; +#ifndef CONFIG_XEN +#define current_domain COORDINATOR_DOMAIN +#else +static int32_t current_domain = COORDINATOR_DOMAIN; +#endif + unsigned long oprofile_get_cpu_buffer_size(void) { return oprofile_cpu_buffer_size; @@ -99,7 +109,7 @@ struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); b->last_task = NULL; - b->last_is_kernel = -1; + b->last_cpu_mode = -1; b->tracing = 0; b->buffer_size = buffer_size; b->sample_received = 0; @@ -217,7 +227,7 @@ static int op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, - int is_kernel, struct task_struct *task) + int cpu_mode, struct task_struct *task) { struct op_entry entry; struct op_sample *sample; @@ -230,16 +240,15 @@ flags |= TRACE_BEGIN; /* notice a switch from user->kernel or vice versa */ - is_kernel = !!is_kernel; - if (cpu_buf->last_is_kernel != is_kernel) { - cpu_buf->last_is_kernel = is_kernel; - flags |= KERNEL_CTX_SWITCH; - if (is_kernel) - flags |= IS_KERNEL; + if (cpu_buf->last_cpu_mode != cpu_mode) { + cpu_buf->last_cpu_mode = cpu_mode; + flags |= KERNEL_CTX_SWITCH | cpu_mode; } /* notice a task switch */ - if (cpu_buf->last_task != task) { + /* if not processing other domain samples */ + if (cpu_buf->last_task != task && + current_domain == COORDINATOR_DOMAIN) { cpu_buf->last_task = task; flags |= USER_CTX_SWITCH; } @@ -288,14 +297,14 @@ /* * This must be safe from any context. * - * is_kernel is needed because on some architectures you cannot + * cpu_mode is needed because on some architectures you cannot * tell if you are in kernel or user space simply by looking at - * pc. We tag this in the buffer by generating kernel enter/exit - * events whenever is_kernel changes + * pc. We tag this in the buffer by generating kernel/user (and + * xen) enter events whenever cpu_mode changes */ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, - unsigned long backtrace, int is_kernel, unsigned long event) + unsigned long backtrace, int cpu_mode, unsigned long event) { cpu_buf->sample_received++; @@ -304,7 +313,7 @@ return 0; } - if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + if (op_add_code(cpu_buf, backtrace, cpu_mode, current)) goto fail; if (op_add_sample(cpu_buf, pc, event)) @@ -420,6 +429,20 @@ log_sample(cpu_buf, pc, 0, is_kernel, event); } +#ifdef CONFIG_XEN +/* + * This is basically log_sample(b, ESCAPE_CODE, 1, cpu_mode, CPU_TRACE_BEGIN), + * as was previously accessible through oprofile_add_pc(). + */ +void oprofile_add_mode(int cpu_mode) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + + if (op_add_code(cpu_buf, 1, cpu_mode, current)) + cpu_buf->sample_lost_overflow++; +} +#endif + void oprofile_add_trace(unsigned long pc) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); @@ -444,6 +467,28 @@ return; } +#ifdef CONFIG_XEN +int oprofile_add_domain_switch(int32_t domain_id) +{ + struct op_entry entry; + struct op_sample *sample; + + sample = op_cpu_buffer_write_reserve(&entry, 1); + if (!sample) + return 0; + + sample->eip = ESCAPE_CODE; + sample->event = DOMAIN_SWITCH; + + op_cpu_buffer_add_data(&entry, domain_id); + op_cpu_buffer_write_commit(&entry); + + current_domain = domain_id; + + return 1; +} +#endif + /* * This serves to avoid cpu buffer overflow, and makes sure * the task mortuary progresses --- linux-ec2-2.6.31.orig/drivers/oprofile/buffer_sync.c +++ linux-ec2-2.6.31/drivers/oprofile/buffer_sync.c @@ -8,6 +8,10 @@ * @author Barry Kasindorf * @author Robert Richter * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the * global event buffer. Such processing is necessary @@ -42,6 +46,10 @@ static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); +#ifdef CONFIG_XEN +static int cpu_current_domain[NR_CPUS]; +#endif + /* Take ownership of the task struct and place it on the * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then @@ -60,7 +68,6 @@ return NOTIFY_OK; } - /* The task is on its way out. A sync of the buffer means we can catch * any remaining samples for this task. */ @@ -153,6 +160,13 @@ int sync_start(void) { int err; +#ifdef CONFIG_XEN + int i; + + for (i = 0; i < NR_CPUS; i++) { + cpu_current_domain[i] = COORDINATOR_DOMAIN; + } +#endif if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL)) return -ENOMEM; @@ -285,14 +299,32 @@ last_cookie = INVALID_COOKIE; } -static void add_kernel_ctx_switch(unsigned int in_kernel) +static void add_cpu_mode_switch(unsigned int cpu_mode) { add_event_entry(ESCAPE_CODE); - if (in_kernel) + switch (cpu_mode) { + case CPU_MODE_USER: + add_event_entry(USER_ENTER_SWITCH_CODE); + break; + case CPU_MODE_KERNEL: add_event_entry(KERNEL_ENTER_SWITCH_CODE); - else - add_event_entry(KERNEL_EXIT_SWITCH_CODE); + break; + case CPU_MODE_XEN: + add_event_entry(XEN_ENTER_SWITCH_CODE); + break; + default: + break; + } +} + +#ifdef CONFIG_XEN +static void add_domain_switch(unsigned long domain_id) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(DOMAIN_SWITCH_CODE); + add_event_entry(domain_id); } +#endif static void add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) @@ -372,12 +404,12 @@ * for later lookup from userspace. Return 0 on failure. */ static int -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) +add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode) { unsigned long cookie; off_t offset; - if (in_kernel) { + if (cpu_mode >= CPU_MODE_KERNEL) { add_sample_entry(s->eip, s->event); return 1; } @@ -502,7 +534,7 @@ unsigned long val; struct task_struct *new; unsigned long cookie = 0; - int in_kernel = 1; + int cpu_mode = CPU_MODE_KERNEL; sync_buffer_state state = sb_buffer_start; unsigned int i; unsigned long available; @@ -514,6 +546,13 @@ add_cpu_switch(cpu); +#ifdef CONFIG_XEN + /* We need to assign the first samples in this CPU buffer to the + same domain that we were processing at the last sync_buffer */ + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) + add_domain_switch(cpu_current_domain[cpu]); +#endif + op_cpu_buffer_reset(cpu); available = op_cpu_buffer_entries(cpu); @@ -530,10 +569,10 @@ } if (flags & KERNEL_CTX_SWITCH) { /* kernel/userspace switch */ - in_kernel = flags & IS_KERNEL; + cpu_mode = flags & CPU_MODE_MASK; if (state == sb_buffer_start) state = sb_sample_start; - add_kernel_ctx_switch(flags & IS_KERNEL); + add_cpu_mode_switch(cpu_mode); } if (flags & USER_CTX_SWITCH && op_cpu_buffer_get_data(&entry, &val)) { @@ -546,16 +585,30 @@ cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); } +#ifdef CONFIG_XEN + if ((flags & DOMAIN_SWITCH) + && op_cpu_buffer_get_data(&entry, &val)) { + cpu_current_domain[cpu] = val; + add_domain_switch(val); + } +#endif if (op_cpu_buffer_get_size(&entry)) add_data(&entry, mm); continue; } +#ifdef CONFIG_XEN + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { + add_sample_entry(sample->eip, sample->event); + continue; + } +#endif + if (state < sb_bt_start) /* ignore sample */ continue; - if (add_sample(mm, sample, in_kernel)) + if (add_sample(mm, sample, cpu_mode)) continue; /* ignore backtraces if failed to add a sample */ @@ -566,6 +619,12 @@ } release_mm(mm); +#ifdef CONFIG_XEN + /* We reset domain to COORDINATOR at each CPU switch */ + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) + add_domain_switch(COORDINATOR_DOMAIN); +#endif + mark_done(cpu); mutex_unlock(&buffer_mutex); --- linux-ec2-2.6.31.orig/drivers/oprofile/oprofile_files.c +++ linux-ec2-2.6.31/drivers/oprofile/oprofile_files.c @@ -5,10 +5,16 @@ * @remark Read the file COPYING * * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #include #include +#include +#include #include "event_buffer.h" #include "oprofile_stats.h" @@ -123,6 +129,200 @@ .write = dump_write, }; +#ifdef CONFIG_XEN +#include + +#define TMPBUFSIZE 512 + +static unsigned int adomains = 0; +static int active_domains[MAX_OPROF_DOMAINS + 1]; +static DEFINE_MUTEX(adom_mutex); + +static ssize_t adomain_write(struct file * file, char const __user * buf, + size_t count, loff_t * offset) +{ + char *tmpbuf; + char *startp, *endp; + int i; + unsigned long val; + ssize_t retval = count; + + if (*offset) + return -EINVAL; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) + return -ENOMEM; + + if (copy_from_user(tmpbuf, buf, count)) { + kfree(tmpbuf); + return -EFAULT; + } + tmpbuf[count] = 0; + + mutex_lock(&adom_mutex); + + startp = tmpbuf; + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { + val = simple_strtoul(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp) || isspace(*endp)) + endp++; + active_domains[i] = val; + if (active_domains[i] != val) + /* Overflow, force error below */ + i = MAX_OPROF_DOMAINS + 1; + startp = endp; + } + /* Force error on trailing junk */ + adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; + + kfree(tmpbuf); + + if (adomains > MAX_OPROF_DOMAINS + || oprofile_set_active(active_domains, adomains)) { + adomains = 0; + retval = -EINVAL; + } + + mutex_unlock(&adom_mutex); + return retval; +} + +static ssize_t adomain_read(struct file * file, char __user * buf, + size_t count, loff_t * offset) +{ + char * tmpbuf; + size_t len; + int i; + ssize_t retval; + + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) + return -ENOMEM; + + mutex_lock(&adom_mutex); + + len = 0; + for (i = 0; i < adomains; i++) + len += snprintf(tmpbuf + len, + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, + "%u ", active_domains[i]); + WARN_ON(len > TMPBUFSIZE); + if (len != 0 && len <= TMPBUFSIZE) + tmpbuf[len-1] = '\n'; + + mutex_unlock(&adom_mutex); + + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); + + kfree(tmpbuf); + return retval; +} + + +static const struct file_operations active_domain_ops = { + .read = adomain_read, + .write = adomain_write, +}; + +static unsigned int pdomains = 0; +static int passive_domains[MAX_OPROF_DOMAINS]; +static DEFINE_MUTEX(pdom_mutex); + +static ssize_t pdomain_write(struct file * file, char const __user * buf, + size_t count, loff_t * offset) +{ + char *tmpbuf; + char *startp, *endp; + int i; + unsigned long val; + ssize_t retval = count; + + if (*offset) + return -EINVAL; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) + return -ENOMEM; + + if (copy_from_user(tmpbuf, buf, count)) { + kfree(tmpbuf); + return -EFAULT; + } + tmpbuf[count] = 0; + + mutex_lock(&pdom_mutex); + + startp = tmpbuf; + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { + val = simple_strtoul(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp) || isspace(*endp)) + endp++; + passive_domains[i] = val; + if (passive_domains[i] != val) + /* Overflow, force error below */ + i = MAX_OPROF_DOMAINS + 1; + startp = endp; + } + /* Force error on trailing junk */ + pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; + + kfree(tmpbuf); + + if (pdomains > MAX_OPROF_DOMAINS + || oprofile_set_passive(passive_domains, pdomains)) { + pdomains = 0; + retval = -EINVAL; + } + + mutex_unlock(&pdom_mutex); + return retval; +} + +static ssize_t pdomain_read(struct file * file, char __user * buf, + size_t count, loff_t * offset) +{ + char * tmpbuf; + size_t len; + int i; + ssize_t retval; + + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) + return -ENOMEM; + + mutex_lock(&pdom_mutex); + + len = 0; + for (i = 0; i < pdomains; i++) + len += snprintf(tmpbuf + len, + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, + "%u ", passive_domains[i]); + WARN_ON(len > TMPBUFSIZE); + if (len != 0 && len <= TMPBUFSIZE) + tmpbuf[len-1] = '\n'; + + mutex_unlock(&pdom_mutex); + + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); + + kfree(tmpbuf); + return retval; +} + +static const struct file_operations passive_domain_ops = { + .read = pdomain_read, + .write = pdomain_write, +}; + +#endif /* CONFIG_XEN */ + void oprofile_create_files(struct super_block *sb, struct dentry *root) { /* reinitialize default values */ @@ -132,6 +332,10 @@ oprofilefs_create_file(sb, root, "enable", &enable_fops); oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); +#ifdef CONFIG_XEN + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); +#endif oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); --- linux-ec2-2.6.31.orig/drivers/oprofile/event_buffer.h +++ linux-ec2-2.6.31/drivers/oprofile/event_buffer.h @@ -30,6 +30,9 @@ #define INVALID_COOKIE ~0UL #define NO_COOKIE 0UL +/* Constant used to refer to coordinator domain (Xen) */ +#define COORDINATOR_DOMAIN -1 + extern const struct file_operations event_buffer_fops; /* mutex between sync_cpu_buffers() and the --- linux-ec2-2.6.31.orig/drivers/oprofile/cpu_buffer.h +++ linux-ec2-2.6.31/drivers/oprofile/cpu_buffer.h @@ -40,7 +40,7 @@ struct oprofile_cpu_buffer { unsigned long buffer_size; struct task_struct *last_task; - int last_is_kernel; + int last_cpu_mode; int tracing; unsigned long sample_received; unsigned long sample_lost_overflow; @@ -62,7 +62,7 @@ { struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); - cpu_buf->last_is_kernel = -1; + cpu_buf->last_cpu_mode = -1; cpu_buf->last_task = NULL; } @@ -112,9 +112,13 @@ } /* extra data flags */ -#define KERNEL_CTX_SWITCH (1UL << 0) -#define IS_KERNEL (1UL << 1) +#define CPU_MODE_USER 0 +#define CPU_MODE_KERNEL 1 +#define CPU_MODE_XEN 2 +#define CPU_MODE_MASK 3 #define TRACE_BEGIN (1UL << 2) #define USER_CTX_SWITCH (1UL << 3) +#define KERNEL_CTX_SWITCH (1UL << 4) +#define DOMAIN_SWITCH (1UL << 5) #endif /* OPROFILE_CPU_BUFFER_H */ --- linux-ec2-2.6.31.orig/drivers/base/base.h +++ linux-ec2-2.6.31/drivers/base/base.h @@ -104,7 +104,7 @@ extern int cpu_dev_init(void); extern int bus_add_device(struct device *dev); -extern void bus_attach_device(struct device *dev); +extern void bus_probe_device(struct device *dev); extern void bus_remove_device(struct device *dev); extern int bus_add_driver(struct device_driver *drv); --- linux-ec2-2.6.31.orig/drivers/base/core.c +++ linux-ec2-2.6.31/drivers/base/core.c @@ -945,7 +945,7 @@ BUS_NOTIFY_ADD_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_ADD); - bus_attach_device(dev); + bus_probe_device(dev); if (parent) klist_add_tail(&dev->p->knode_parent, &parent->p->klist_children); --- linux-ec2-2.6.31.orig/drivers/base/bus.c +++ linux-ec2-2.6.31/drivers/base/bus.c @@ -459,8 +459,9 @@ * bus_add_device - add device to bus * @dev: device being added * + * - Add device's bus attributes. + * - Create links to device's bus. * - Add the device to its bus's list of devices. - * - Create link to device's bus. */ int bus_add_device(struct device *dev) { @@ -483,6 +484,7 @@ error = make_deprecated_bus_links(dev); if (error) goto out_deprecated; + klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices); } return 0; @@ -498,24 +500,19 @@ } /** - * bus_attach_device - add device to bus - * @dev: device tried to attach to a driver + * bus_probe_device - probe drivers for a new device + * @dev: device to probe * - * - Add device to bus's list of devices. - * - Try to attach to driver. + * - Automatically probe for a driver if the bus allows it. */ -void bus_attach_device(struct device *dev) +void bus_probe_device(struct device *dev) { struct bus_type *bus = dev->bus; - int ret = 0; + int ret; - if (bus) { - if (bus->p->drivers_autoprobe) - ret = device_attach(dev); + if (bus && bus->p->drivers_autoprobe) { + ret = device_attach(dev); WARN_ON(ret < 0); - if (ret >= 0) - klist_add_tail(&dev->p->knode_bus, - &bus->p->klist_devices); } } --- linux-ec2-2.6.31.orig/drivers/base/cpu.c +++ linux-ec2-2.6.31/drivers/base/cpu.c @@ -78,7 +78,7 @@ } #endif /* CONFIG_HOTPLUG_CPU */ -#ifdef CONFIG_KEXEC +#if defined(CONFIG_KEXEC) && !defined(CONFIG_XEN) #include static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr, @@ -217,7 +217,7 @@ if (!error) register_cpu_under_node(num, cpu_to_node(num)); -#ifdef CONFIG_KEXEC +#if defined(CONFIG_KEXEC) && !defined(CONFIG_XEN) if (!error) error = sysdev_create_file(&cpu->sysdev, &attr_crash_notes); #endif --- linux-ec2-2.6.31.orig/drivers/base/driver.c +++ linux-ec2-2.6.31/drivers/base/driver.c @@ -236,7 +236,7 @@ put_driver(other); printk(KERN_ERR "Error: Driver '%s' is already registered, " "aborting...\n", drv->name); - return -EEXIST; + return -EBUSY; } ret = bus_add_driver(drv); --- linux-ec2-2.6.31.orig/drivers/md/dm-log-userspace-base.c +++ linux-ec2-2.6.31/drivers/md/dm-log-userspace-base.c @@ -156,7 +156,7 @@ } /* The ptr value is sufficient for local unique id */ - lc->luid = (uint64_t)lc; + lc->luid = (unsigned long)lc; lc->ti = ti; @@ -582,7 +582,7 @@ break; case STATUSTYPE_TABLE: sz = 0; - table_args = strstr(lc->usr_argv_str, " "); + table_args = strchr(lc->usr_argv_str, ' '); BUG_ON(!table_args); /* There will always be a ' ' */ table_args++; --- linux-ec2-2.6.31.orig/drivers/md/raid5.c +++ linux-ec2-2.6.31/drivers/md/raid5.c @@ -3790,6 +3790,8 @@ sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); if (sector_nr) { + mddev->curr_resync_completed = sector_nr; + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); *skipped = 1; return sector_nr; } --- linux-ec2-2.6.31.orig/drivers/md/dm-log-userspace-transfer.c +++ linux-ec2-2.6.31/drivers/md/dm-log-userspace-transfer.c @@ -129,11 +129,13 @@ * This is the connector callback that delivers data * that was sent from userspace. */ -static void cn_ulog_callback(void *data) +static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { - struct cn_msg *msg = (struct cn_msg *)data; struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1); + if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) + return; + spin_lock(&receiving_list_lock); if (msg->len == 0) fill_pkg(msg, NULL); --- linux-ec2-2.6.31.orig/drivers/md/dm-exception-store.h +++ linux-ec2-2.6.31/drivers/md/dm-exception-store.h @@ -101,9 +101,9 @@ struct dm_dev *cow; /* Size of data blocks saved - must be a power of 2 */ - chunk_t chunk_size; - chunk_t chunk_mask; - chunk_t chunk_shift; + unsigned chunk_size; + unsigned chunk_mask; + unsigned chunk_shift; void *context; }; @@ -169,7 +169,7 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type); int dm_exception_store_set_chunk_size(struct dm_exception_store *store, - unsigned long chunk_size_ulong, + unsigned chunk_size, char **error); int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, --- linux-ec2-2.6.31.orig/drivers/md/dm-snap.c +++ linux-ec2-2.6.31/drivers/md/dm-snap.c @@ -296,6 +296,7 @@ */ static int register_snapshot(struct dm_snapshot *snap) { + struct dm_snapshot *l; struct origin *o, *new_o; struct block_device *bdev = snap->origin->bdev; @@ -319,7 +320,11 @@ __insert_origin(o); } - list_add_tail(&snap->list, &o->snapshots); + /* Sort the list according to chunk size, largest-first smallest-last */ + list_for_each_entry(l, &o->snapshots, list) + if (l->store->chunk_size < snap->store->chunk_size) + break; + list_add_tail(&snap->list, &l->list); up_write(&_origins_lock); return 0; @@ -668,6 +673,11 @@ bio_list_init(&s->queued_bios); INIT_WORK(&s->queued_bios_work, flush_queued_bios); + if (!s->store->chunk_size) { + ti->error = "Chunk size not set"; + goto bad_load_and_register; + } + /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ if (register_snapshot(s)) { @@ -951,7 +961,7 @@ src.bdev = bdev; src.sector = chunk_to_sector(s->store, pe->e.old_chunk); - src.count = min(s->store->chunk_size, dev_size - src.sector); + src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); dest.bdev = s->store->cow->bdev; dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); @@ -1142,6 +1152,8 @@ unsigned sz = 0; struct dm_snapshot *snap = ti->private; + down_write(&snap->lock); + switch (type) { case STATUSTYPE_INFO: if (!snap->valid) @@ -1173,6 +1185,8 @@ break; } + up_write(&snap->lock); + return 0; } @@ -1388,7 +1402,7 @@ struct dm_dev *dev = ti->private; struct dm_snapshot *snap; struct origin *o; - chunk_t chunk_size = 0; + unsigned chunk_size = 0; down_read(&_origins_lock); o = __lookup_origin(dev->bdev); @@ -1465,7 +1479,7 @@ r = dm_register_target(&snapshot_target); if (r) { DMERR("snapshot target register failed %d", r); - return r; + goto bad_register_snapshot_target; } r = dm_register_target(&origin_target); @@ -1522,6 +1536,9 @@ dm_unregister_target(&origin_target); bad1: dm_unregister_target(&snapshot_target); + +bad_register_snapshot_target: + dm_exception_store_exit(); return r; } --- linux-ec2-2.6.31.orig/drivers/md/raid1.c +++ linux-ec2-2.6.31/drivers/md/raid1.c @@ -1643,11 +1643,12 @@ r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); - } + } else + md_error(mddev, + conf->mirrors[r1_bio->read_disk].rdev); bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1 || - disk == r1_bio->read_disk) { + if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), @@ -1676,6 +1677,7 @@ generic_make_request(bio); } } + cond_resched(); } if (unplug) unplug_slaves(mddev); --- linux-ec2-2.6.31.orig/drivers/md/bitmap.h +++ linux-ec2-2.6.31/drivers/md/bitmap.h @@ -282,7 +282,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); void bitmap_unplug(struct bitmap *bitmap); -void bitmap_daemon_work(struct bitmap *bitmap); +void bitmap_daemon_work(mddev_t *mddev); #endif #endif --- linux-ec2-2.6.31.orig/drivers/md/dm.c +++ linux-ec2-2.6.31/drivers/md/dm.c @@ -47,6 +47,7 @@ atomic_t io_count; struct bio *bio; unsigned long start_time; + spinlock_t endio_lock; }; /* @@ -576,8 +577,12 @@ struct mapped_device *md = io->md; /* Push-back supersedes any I/O errors */ - if (error && !(io->error > 0 && __noflush_suspending(md))) - io->error = error; + if (unlikely(error)) { + spin_lock_irqsave(&io->endio_lock, flags); + if (!(io->error > 0 && __noflush_suspending(md))) + io->error = error; + spin_unlock_irqrestore(&io->endio_lock, flags); + } if (atomic_dec_and_test(&io->io_count)) { if (io->error == DM_ENDIO_REQUEUE) { @@ -1224,6 +1229,7 @@ atomic_set(&ci.io->io_count, 1); ci.io->bio = bio; ci.io->md = md; + spin_lock_init(&ci.io->endio_lock); ci.sector = bio->bi_sector; ci.sector_count = bio_sectors(bio); if (unlikely(bio_empty_barrier(bio))) @@ -1819,6 +1825,7 @@ bad_bdev: destroy_workqueue(md->wq); bad_thread: + del_gendisk(md->disk); put_disk(md->disk); bad_disk: blk_cleanup_queue(md->queue); @@ -2562,6 +2569,7 @@ { return md->disk; } +EXPORT_SYMBOL_GPL(dm_disk); struct kobject *dm_kobject(struct mapped_device *md) { --- linux-ec2-2.6.31.orig/drivers/md/dm-exception-store.c +++ linux-ec2-2.6.31/drivers/md/dm-exception-store.c @@ -155,7 +155,8 @@ char *value; chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0') { + if (*chunk_size_arg == '\0' || *value != '\0' || + chunk_size_ulong > UINT_MAX) { *error = "Invalid chunk size"; return -EINVAL; } @@ -171,34 +172,35 @@ */ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); - return dm_exception_store_set_chunk_size(store, chunk_size_ulong, + return dm_exception_store_set_chunk_size(store, + (unsigned) chunk_size_ulong, error); } int dm_exception_store_set_chunk_size(struct dm_exception_store *store, - unsigned long chunk_size_ulong, + unsigned chunk_size, char **error) { /* Check chunk_size is a power of 2 */ - if (!is_power_of_2(chunk_size_ulong)) { + if (!is_power_of_2(chunk_size)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) { + if (chunk_size % (bdev_logical_block_size(store->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } - if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) { + if (chunk_size > INT_MAX >> SECTOR_SHIFT) { *error = "Chunk size is too high"; return -EINVAL; } - store->chunk_size = chunk_size_ulong; - store->chunk_mask = chunk_size_ulong - 1; - store->chunk_shift = ffs(chunk_size_ulong) - 1; + store->chunk_size = chunk_size; + store->chunk_mask = chunk_size - 1; + store->chunk_shift = ffs(chunk_size) - 1; return 0; } @@ -251,7 +253,7 @@ r = set_chunk_size(tmp_store, argv[2], &ti->error); if (r) - goto bad_cow; + goto bad_ctr; r = type->ctr(tmp_store, 0, NULL); if (r) { --- linux-ec2-2.6.31.orig/drivers/md/raid10.c +++ linux-ec2-2.6.31/drivers/md/raid10.c @@ -1630,6 +1630,7 @@ generic_make_request(bio); } } + cond_resched(); } if (unplug) unplug_slaves(mddev); --- linux-ec2-2.6.31.orig/drivers/md/bitmap.c +++ linux-ec2-2.6.31/drivers/md/bitmap.c @@ -1077,23 +1077,31 @@ * out to disk */ -void bitmap_daemon_work(struct bitmap *bitmap) +void bitmap_daemon_work(mddev_t *mddev) { + struct bitmap *bitmap; unsigned long j; unsigned long flags; struct page *page = NULL, *lastpage = NULL; int blocks; void *paddr; - if (bitmap == NULL) + /* Use a mutex to guard daemon_work against + * bitmap_destroy. + */ + mutex_lock(&mddev->bitmap_mutex); + bitmap = mddev->bitmap; + if (bitmap == NULL) { + mutex_unlock(&mddev->bitmap_mutex); return; + } if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) goto done; bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - return; + goto done; } bitmap->allclean = 1; @@ -1202,6 +1210,7 @@ done: if (bitmap->allclean == 0) bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; + mutex_unlock(&mddev->bitmap_mutex); } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1538,9 +1547,9 @@ */ sleep = bitmap->daemon_sleep; bitmap->daemon_sleep = 0; - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); + bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_sleep = sleep; bitmap_update_sb(bitmap); } @@ -1571,6 +1580,7 @@ kfree(bp); kfree(bitmap); } + void bitmap_destroy(mddev_t *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1578,7 +1588,9 @@ if (!bitmap) /* there was no bitmap */ return; + mutex_lock(&mddev->bitmap_mutex); mddev->bitmap = NULL; /* disconnect from the md device */ + mutex_unlock(&mddev->bitmap_mutex); if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; --- linux-ec2-2.6.31.orig/drivers/md/md.c +++ linux-ec2-2.6.31/drivers/md/md.c @@ -276,7 +276,9 @@ if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; if (!mddev->raid_disks && list_empty(&mddev->disks) && - !mddev->hold_active) { + mddev->ctime == 0 && !mddev->hold_active) { + /* Array is not configured at all, and not held active, + * so destroy it */ list_del(&mddev->all_mddevs); if (mddev->gendisk) { /* we did a probe so need to clean up. @@ -361,6 +363,7 @@ mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); + mutex_init(&new->bitmap_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); @@ -5039,6 +5042,10 @@ mddev->minor_version = info->minor_version; mddev->patch_version = info->patch_version; mddev->persistent = !info->not_persistent; + /* ensure mddev_put doesn't delete this now that there + * is some minimal configuration. + */ + mddev->ctime = get_seconds(); return 0; } mddev->major_version = MD_MAJOR_VERSION; @@ -6495,8 +6502,9 @@ skip: mddev->curr_resync = 0; mddev->curr_resync_completed = 0; - mddev->resync_min = 0; - mddev->resync_max = MaxSector; + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + /* We completed so max setting can be forgotten. */ + mddev->resync_max = MaxSector; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); wake_up(&resync_wait); set_bit(MD_RECOVERY_DONE, &mddev->recovery); @@ -6594,7 +6602,7 @@ if (mddev->bitmap) - bitmap_daemon_work(mddev->bitmap); + bitmap_daemon_work(mddev); if (mddev->ro) return; --- linux-ec2-2.6.31.orig/drivers/md/dm-snap-persistent.c +++ linux-ec2-2.6.31/drivers/md/dm-snap-persistent.c @@ -284,12 +284,13 @@ { int r; struct disk_header *dh; - chunk_t chunk_size; + unsigned chunk_size; int chunk_size_supplied = 1; char *chunk_err; /* - * Use default chunk size (or hardsect_size, if larger) if none supplied + * Use default chunk size (or logical_block_size, if larger) + * if none supplied */ if (!ps->store->chunk_size) { ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, @@ -334,10 +335,9 @@ return 0; if (chunk_size_supplied) - DMWARN("chunk size %llu in device metadata overrides " - "table chunk size of %llu.", - (unsigned long long)chunk_size, - (unsigned long long)ps->store->chunk_size); + DMWARN("chunk size %u in device metadata overrides " + "table chunk size of %u.", + chunk_size, ps->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); @@ -345,8 +345,8 @@ r = dm_exception_store_set_chunk_size(ps->store, chunk_size, &chunk_err); if (r) { - DMERR("invalid on-disk chunk size %llu: %s.", - (unsigned long long)chunk_size, chunk_err); + DMERR("invalid on-disk chunk size %u: %s.", + chunk_size, chunk_err); return r; } --- linux-ec2-2.6.31.orig/drivers/md/md.h +++ linux-ec2-2.6.31/drivers/md/md.h @@ -289,6 +289,7 @@ * hot-adding a bitmap. It should * eventually be settable by sysfs. */ + struct mutex bitmap_mutex; struct list_head all_mddevs; }; --- linux-ec2-2.6.31.orig/drivers/cpuidle/Kconfig +++ linux-ec2-2.6.31/drivers/cpuidle/Kconfig @@ -1,6 +1,7 @@ config CPU_IDLE bool "CPU idle PM support" + depends on !PROCESSOR_EXTERNAL_CONTROL default ACPI help CPU idle is a generic framework for supporting software-controlled --- linux-ec2-2.6.31.orig/drivers/cpuidle/cpuidle.c +++ linux-ec2-2.6.31/drivers/cpuidle/cpuidle.c @@ -75,8 +75,11 @@ #endif /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); - if (need_resched()) + if (need_resched()) { + local_irq_enable(); return; + } + target_state = &dev->states[next_state]; /* enter the state and update stats */ --- linux-ec2-2.6.31.orig/drivers/virtio/virtio_ring.c +++ linux-ec2-2.6.31/drivers/virtio/virtio_ring.c @@ -281,6 +281,9 @@ return NULL; } + /* Only get used array entries after they have been exposed by host. */ + rmb(); + i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; --- linux-ec2-2.6.31.orig/drivers/mmc/host/pxamci.c +++ linux-ec2-2.6.31/drivers/mmc/host/pxamci.c @@ -694,14 +694,14 @@ if (mmc) { struct pxamci_host *host = mmc_priv(mmc); + mmc_remove_host(mmc); + if (host->vcc) regulator_put(host->vcc); if (host->pdata && host->pdata->exit) host->pdata->exit(&pdev->dev, mmc); - mmc_remove_host(mmc); - pxamci_stop_clock(host); writel(TXFIFO_WR_REQ|RXFIFO_RD_REQ|CLK_IS_OFF|STOP_CMD| END_CMD_RES|PRG_DONE|DATA_TRAN_DONE, --- linux-ec2-2.6.31.orig/drivers/mmc/card/block.c +++ linux-ec2-2.6.31/drivers/mmc/card/block.c @@ -88,6 +88,12 @@ int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); + /* + * We are about to drop the last reference to the disk object. + * Nothing else should now be looking at the queue pointer, so + * now it won't hurt if we release it. + */ + blk_cleanup_queue(md->disk->queue); put_disk(md->disk); kfree(md); } --- linux-ec2-2.6.31.orig/drivers/mmc/card/queue.c +++ linux-ec2-2.6.31/drivers/mmc/card/queue.c @@ -245,7 +245,12 @@ kfree(mq->bounce_buf); mq->bounce_buf = NULL; - blk_cleanup_queue(mq->queue); + /* + * Calling blk_cleanup_queue() would be too soon here. As long as + * the gendisk has a reference to it and is not released we should + * keep the queue. It has been shutdown and will not accept any new + * requests, so that should be safe. + */ mq->card = NULL; } --- linux-ec2-2.6.31.orig/drivers/mmc/core/mmc.c +++ linux-ec2-2.6.31/drivers/mmc/core/mmc.c @@ -180,11 +180,11 @@ err = mmc_send_ext_csd(card, ext_csd); if (err) { - /* - * We all hosts that cannot perform the command - * to fail more gracefully - */ - if (err != -EINVAL) + /* If the host or the card can't do the switch, + * fail more gracefully. */ + if ((err != -EINVAL) + && (err != -ENOSYS) + && (err != -EFAULT)) goto out; /* --- linux-ec2-2.6.31.orig/drivers/mmc/core/sd.c +++ linux-ec2-2.6.31/drivers/mmc/core/sd.c @@ -210,11 +210,11 @@ err = mmc_sd_switch(card, 0, 0, 1, status); if (err) { - /* - * We all hosts that cannot perform the command - * to fail more gracefully - */ - if (err != -EINVAL) + /* If the host or the card can't do the switch, + * fail more gracefully. */ + if ((err != -EINVAL) + && (err != -ENOSYS) + && (err != -EFAULT)) goto out; printk(KERN_WARNING "%s: problem reading switch " --- linux-ec2-2.6.31.orig/drivers/video/vesafb.c +++ linux-ec2-2.6.31/drivers/video/vesafb.c @@ -28,6 +28,12 @@ #define dac_reg (0x3c8) #define dac_val (0x3c9) +struct vesafb_info +{ + u32 pseudo_palette[256]; + int mtrr_hdl; +}; + /* --------------------------------------------------------------------- */ static struct fb_var_screeninfo vesafb_defined __initdata = { @@ -47,16 +53,37 @@ .accel = FB_ACCEL_NONE, }; +#ifndef MODULE static int inverse __read_mostly; +#endif static int mtrr __read_mostly; /* disable mtrr */ static int vram_remap __initdata; /* Set amount of memory to be used */ static int vram_total __initdata; /* Set total amount of memory */ static int pmi_setpal __read_mostly = 1; /* pmi for palette changes ??? */ +static int redraw __read_mostly; static int ypan __read_mostly; /* 0..nothing, 1..ypan, 2..ywrap */ +static int ywrap __read_mostly; static void (*pmi_start)(void) __read_mostly; static void (*pmi_pal) (void) __read_mostly; static int depth __read_mostly; static int vga_compat __read_mostly; + +module_param(redraw, bool, 0); +module_param(ypan, bool, 0); +module_param(ywrap, bool, 0); +module_param_named(vgapal, pmi_setpal, invbool, 0); +MODULE_PARM_DESC(vgapal, "Use VGA for setting palette (default)"); +module_param_named(pmipal, pmi_setpal, bool, 0); +MODULE_PARM_DESC(pmipal, "Use PMI for setting palette"); +module_param(mtrr, bool, 0); +MODULE_PARM_DESC(mtrr, "Enable MTRR support (default)"); +module_param_named(nomtrr, mtrr, invbool, 0); +MODULE_PARM_DESC(nomtrr, "Disable MTRR support"); +module_param(vram_remap, int, 0); +MODULE_PARM_DESC(vram_remap, "Set total amount of memory to be used"); +module_param(vram_total, int, 0); +MODULE_PARM_DESC(vram_total, "Total amount of memory"); + /* --------------------------------------------------------------------- */ static int vesafb_pan_display(struct fb_var_screeninfo *var, @@ -192,6 +219,7 @@ .fb_imageblit = cfb_imageblit, }; +#ifndef MODULE static int __init vesafb_setup(char *options) { char *this_opt; @@ -225,6 +253,7 @@ } return 0; } +#endif static int __init vesafb_probe(struct platform_device *dev) { @@ -476,8 +505,28 @@ return err; } +static int __exit vesafb_remove(struct platform_device *device) +{ + struct fb_info *info = dev_get_drvdata(&device->dev); + + unregister_framebuffer(info); +#ifdef CONFIG_MTRR + { + struct vesafb_info *vfb_info = (struct vesafb_info *) info->par; + if (vfb_info->mtrr_hdl >= 0) + mtrr_del(vfb_info->mtrr_hdl, 0, 0); + } +#endif + iounmap(info->screen_base); + framebuffer_release(info); + release_mem_region(vesafb_fix.smem_start, vesafb_fix.smem_len); + + return 0; +} + static struct platform_driver vesafb_driver = { .probe = vesafb_probe, + .remove = vesafb_remove, .driver = { .name = "vesafb", }, @@ -488,11 +537,18 @@ static int __init vesafb_init(void) { int ret; +#ifndef MODULE char *option = NULL; /* ignore error return of fb_get_options */ fb_get_options("vesafb", &option); vesafb_setup(option); +#else + if (redraw) + ypan = 0; + if (ywrap) + ypan = 2; +#endif ret = platform_driver_register(&vesafb_driver); if (!ret) { @@ -511,6 +567,14 @@ return ret; } + +static void __exit vesafb_exit(void) +{ + platform_device_unregister(vesafb_device); + platform_driver_unregister(&vesafb_driver); +} + module_init(vesafb_init); +module_exit(vesafb_exit); MODULE_LICENSE("GPL"); --- linux-ec2-2.6.31.orig/drivers/video/xen-fbfront.c +++ linux-ec2-2.6.31/drivers/video/xen-fbfront.c @@ -670,7 +670,6 @@ static struct xenbus_driver xenfb_driver = { .name = "vfb", - .owner = THIS_MODULE, .ids = xenfb_ids, .probe = xenfb_probe, .remove = xenfb_remove, --- linux-ec2-2.6.31.orig/drivers/video/Kconfig +++ linux-ec2-2.6.31/drivers/video/Kconfig @@ -686,8 +686,8 @@ If unsure, say N. config FB_VESA - bool "VESA VGA graphics support" - depends on (FB = y) && X86 + tristate "VESA VGA graphics support" + depends on FB && X86 select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT @@ -2064,7 +2064,7 @@ config XEN_FBDEV_FRONTEND tristate "Xen virtual frame buffer support" - depends on FB && XEN + depends on FB && PARAVIRT_XEN select FB_SYS_FILLRECT select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT --- linux-ec2-2.6.31.orig/drivers/video/uvesafb.c +++ linux-ec2-2.6.31/drivers/video/uvesafb.c @@ -67,12 +67,14 @@ * find the kernel part of the task struct, copy the registers and * the buffer contents and then complete the task. */ -static void uvesafb_cn_callback(void *data) +static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { - struct cn_msg *msg = data; struct uvesafb_task *utask; struct uvesafb_ktask *task; + if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) + return; + if (msg->seq >= UVESAFB_TASKS_MAX) return; --- linux-ec2-2.6.31.orig/drivers/video/s3c-fb.c +++ linux-ec2-2.6.31/drivers/video/s3c-fb.c @@ -1036,7 +1036,7 @@ static struct platform_driver s3c_fb_driver = { .probe = s3c_fb_probe, - .remove = s3c_fb_remove, + .remove = __devexit_p(s3c_fb_remove), .suspend = s3c_fb_suspend, .resume = s3c_fb_resume, .driver = { --- linux-ec2-2.6.31.orig/drivers/video/sis/vstruct.h +++ linux-ec2-2.6.31/drivers/video/sis/vstruct.h @@ -342,7 +342,7 @@ unsigned short SiS_RY4COE; unsigned short SiS_LCDHDES; unsigned short SiS_LCDVDES; - unsigned short SiS_DDC_Port; + SISIOADDRESS SiS_DDC_Port; unsigned short SiS_DDC_Index; unsigned short SiS_DDC_Data; unsigned short SiS_DDC_NData; --- linux-ec2-2.6.31.orig/drivers/video/matrox/g450_pll.c +++ linux-ec2-2.6.31/drivers/video/matrox/g450_pll.c @@ -341,7 +341,8 @@ M1064_XDVICLKCTRL_C1DVICLKEN | M1064_XDVICLKCTRL_DVILOOPCTL | M1064_XDVICLKCTRL_P1LOOPBWDTCTL; - matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL,tmp); + /* Setting this breaks PC systems so don't do it */ + /* matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL,tmp); */ matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl); --- linux-ec2-2.6.31.orig/drivers/video/console/fbcon.c +++ linux-ec2-2.6.31/drivers/video/console/fbcon.c @@ -114,6 +114,7 @@ static int fbcon_is_default = 1; static int fbcon_has_exited; static int primary_device = -1; +static int fbcon_has_console_bind; #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY static int map_override; @@ -544,6 +545,8 @@ con2fb_map[i] = -1; } info_idx = -1; + } else { + fbcon_has_console_bind = 1; } return err; @@ -2923,6 +2926,10 @@ ret = unbind_con_driver(&fb_con, first_fb_vc, last_fb_vc, fbcon_is_default); + + if (!ret) + fbcon_has_console_bind = 0; + return ret; } #else @@ -2936,6 +2943,9 @@ { int i, new_idx = -1, ret = 0; + if (!fbcon_has_console_bind) + return 0; + for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] != idx && con2fb_map[i] != -1) { --- linux-ec2-2.6.31.orig/drivers/video/backlight/lcd.c +++ linux-ec2-2.6.31/drivers/video/backlight/lcd.c @@ -56,7 +56,7 @@ static int lcd_register_fb(struct lcd_device *ld) { - memset(&ld->fb_notif, 0, sizeof(&ld->fb_notif)); + memset(&ld->fb_notif, 0, sizeof(ld->fb_notif)); ld->fb_notif.notifier_call = fb_notifier_callback; return fb_register_client(&ld->fb_notif); } --- linux-ec2-2.6.31.orig/drivers/rtc/rtc-v3020.c +++ linux-ec2-2.6.31/drivers/rtc/rtc-v3020.c @@ -96,7 +96,7 @@ static unsigned char v3020_mmio_read_bit(struct v3020 *chip) { - return readl(chip->ioaddress) & (1 << chip->leftshift); + return !!(readl(chip->ioaddress) & (1 << chip->leftshift)); } static struct v3020_chip_ops v3020_mmio_ops = { --- linux-ec2-2.6.31.orig/drivers/ssb/sprom.c +++ linux-ec2-2.6.31/drivers/ssb/sprom.c @@ -13,6 +13,8 @@ #include "ssb_private.h" +#include + static const struct ssb_sprom *fallback_sprom; @@ -33,17 +35,27 @@ static int hex2sprom(u16 *sprom, const char *dump, size_t len, size_t sprom_size_words) { - char tmp[5] = { 0 }; - int cnt = 0; + char c, tmp[5] = { 0 }; + int err, cnt = 0; unsigned long parsed; - if (len < sprom_size_words * 2) + /* Strip whitespace at the end. */ + while (len) { + c = dump[len - 1]; + if (!isspace(c) && c != '\0') + break; + len--; + } + /* Length must match exactly. */ + if (len != sprom_size_words * 4) return -EINVAL; while (cnt < sprom_size_words) { memcpy(tmp, dump, 4); dump += 4; - parsed = simple_strtoul(tmp, NULL, 16); + err = strict_strtoul(tmp, 16, &parsed); + if (err) + return err; sprom[cnt++] = swab16((u16)parsed); } --- linux-ec2-2.6.31.orig/drivers/acpi/video.c +++ linux-ec2-2.6.31/drivers/acpi/video.c @@ -603,6 +603,7 @@ unsigned long long *level) { acpi_status status = AE_OK; + int i; if (device->cap._BQC || device->cap._BCQ) { char *buf = device->cap._BQC ? "_BQC" : "_BCQ"; @@ -618,8 +619,15 @@ } *level += bqc_offset_aml_bug_workaround; - device->brightness->curr = *level; - return 0; + for (i = 2; i < device->brightness->count; i++) + if (device->brightness->levels[i] == *level) { + device->brightness->curr = *level; + return 0; + } + /* BQC returned an invalid level. Stop using it. */ + ACPI_WARNING((AE_INFO, "%s returned an invalid level", + buf)); + device->cap._BQC = device->cap._BCQ = 0; } else { /* Fixme: * should we return an error or ignore this failure? @@ -870,7 +878,7 @@ br->flags._BCM_use_index = br->flags._BCL_use_index; /* _BQC uses INDEX while _BCL uses VALUE in some laptops */ - br->curr = level_old = max_level; + br->curr = level = max_level; if (!device->cap._BQC) goto set_level; @@ -892,15 +900,25 @@ br->flags._BQC_use_index = (level == max_level ? 0 : 1); - if (!br->flags._BQC_use_index) + if (!br->flags._BQC_use_index) { + /* + * Set the backlight to the initial state. + * On some buggy laptops, _BQC returns an uninitialized value + * when invoked for the first time, i.e. level_old is invalid. + * set the backlight to max_level in this case + */ + for (i = 2; i < br->count; i++) + if (level_old == br->levels[i]) + level = level_old; goto set_level; + } if (br->flags._BCL_reversed) level_old = (br->count - 1) - level_old; - level_old = br->levels[level_old]; + level = br->levels[level_old]; set_level: - result = acpi_video_device_lcd_set_level(device, level_old); + result = acpi_video_device_lcd_set_level(device, level); if (result) goto out_free_levels; @@ -1083,7 +1101,12 @@ */ /* Does this device support video switching? */ - if (video->cap._DOS) { + if (video->cap._DOS || video->cap._DOD) { + if (!video->cap._DOS) { + printk(KERN_WARNING FW_BUG + "ACPI(%s) defines _DOD but not _DOS\n", + acpi_device_bid(video->device)); + } video->flags.multihead = 1; status = 0; } --- linux-ec2-2.6.31.orig/drivers/acpi/pci_root.c +++ linux-ec2-2.6.31/drivers/acpi/pci_root.c @@ -400,6 +400,17 @@ pbus = pdev->subordinate; pci_dev_put(pdev); + + /* + * This function may be called for a non-PCI device that has a + * PCI parent (eg. a disk under a PCI SATA controller). In that + * case pdev->subordinate will be NULL for the parent. + */ + if (!pbus) { + dev_dbg(&pdev->dev, "Not a PCI-to-PCI bridge\n"); + pdev = NULL; + break; + } } out: list_for_each_entry_safe(node, tmp, &device_list, node) @@ -465,6 +476,40 @@ } EXPORT_SYMBOL(acpi_pci_osc_control_set); +#ifdef CONFIG_PCI_GUESTDEV +#include + +static ssize_t seg_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct list_head *entry; + + list_for_each(entry, &acpi_pci_roots) { + struct acpi_pci_root *root; + root = list_entry(entry, struct acpi_pci_root, node); + if (&root->device->dev == dev) + return sprintf(buf, "%04x\n", root->segment); + } + return 0; +} +static DEVICE_ATTR(seg, 0444, seg_show, NULL); + +static ssize_t bbn_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct list_head *entry; + + list_for_each(entry, &acpi_pci_roots) { + struct acpi_pci_root *root; + root = list_entry(entry, struct acpi_pci_root, node); + if (&root->device->dev == dev) + return sprintf(buf, "%02x\n", root->bus_nr); + } + return 0; +} +static DEVICE_ATTR(bbn, 0444, bbn_show, NULL); +#endif + static int __devinit acpi_pci_root_add(struct acpi_device *device) { unsigned long long segment, bus; @@ -576,6 +621,13 @@ if (flags != base_flags) acpi_pci_osc_support(root, flags); +#ifdef CONFIG_PCI_GUESTDEV + if (device_create_file(&device->dev, &dev_attr_seg)) + dev_warn(&device->dev, "could not create seg attr\n"); + if (device_create_file(&device->dev, &dev_attr_bbn)) + dev_warn(&device->dev, "could not create bbn attr\n"); +#endif + return 0; end: @@ -613,3 +665,33 @@ } subsys_initcall(acpi_pci_root_init); + +#ifdef CONFIG_PCI_GUESTDEV +int acpi_pci_get_root_seg_bbn(char *hid, char *uid, int *seg, int *bbn) +{ + struct list_head *entry; + + list_for_each(entry, &acpi_pci_roots) { + struct acpi_pci_root *root; + root = list_entry(entry, struct acpi_pci_root, node); + if (!root->device->flags.hardware_id) + continue; + + if (strcmp(root->device->pnp.hardware_id, hid)) + continue; + + if (!root->device->flags.unique_id) { + if (strlen(uid)) + continue; + } else { + if (strcmp(root->device->pnp.unique_id, uid)) + continue; + } + + *seg = (int)root->segment; + *bbn = (int)root->bus_nr; + return TRUE; + } + return FALSE; +} +#endif --- linux-ec2-2.6.31.orig/drivers/acpi/processor_extcntl.c +++ linux-ec2-2.6.31/drivers/acpi/processor_extcntl.c @@ -0,0 +1,346 @@ +/* + * processor_extcntl.c - channel to external control logic + * + * Copyright (C) 2008, Intel corporation + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define ACPI_PROCESSOR_CLASS "processor" +#define _COMPONENT ACPI_PROCESSOR_COMPONENT +ACPI_MODULE_NAME("processor_extcntl") + +static int processor_extcntl_parse_csd(struct acpi_processor *pr); +static int processor_extcntl_get_performance(struct acpi_processor *pr); +/* + * External processor control logic may register with its own set of + * ops to get ACPI related notification. One example is like VMM. + */ +const struct processor_extcntl_ops *processor_extcntl_ops; +EXPORT_SYMBOL(processor_extcntl_ops); + +static int processor_notify_smm(void) +{ + acpi_status status; + static int is_done = 0; + + /* only need successfully notify BIOS once */ + /* avoid double notification which may lead to unexpected result */ + if (is_done) + return 0; + + /* Can't write pstate_cnt to smi_cmd if either value is zero */ + if (!acpi_gbl_FADT.smi_command || !acpi_gbl_FADT.pstate_control) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n")); + return 0; + } + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n", + acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command)); + + status = acpi_os_write_port(acpi_gbl_FADT.smi_command, + acpi_gbl_FADT.pstate_control, 8); + if (ACPI_FAILURE(status)) + return status; + + is_done = 1; + + return 0; +} + +int processor_notify_external(struct acpi_processor *pr, int event, int type) +{ + int ret = -EINVAL; + + if (!processor_cntl_external()) + return -EINVAL; + + switch (event) { + case PROCESSOR_PM_INIT: + case PROCESSOR_PM_CHANGE: + if ((type >= PM_TYPE_MAX) || + !processor_extcntl_ops->pm_ops[type]) + break; + + ret = processor_extcntl_ops->pm_ops[type](pr, event); + break; + case PROCESSOR_HOTPLUG: + if (processor_extcntl_ops->hotplug) + ret = processor_extcntl_ops->hotplug(pr, type); + break; + default: + printk(KERN_ERR "Unsupport processor events %d.\n", event); + break; + } + + return ret; +} + +/* + * External control logic can decide to grab full or part of physical + * processor control bits. Take a VMM for example, physical processors + * are owned by VMM and thus existence information like hotplug is + * always required to be notified to VMM. Similar is processor idle + * state which is also necessarily controlled by VMM. But for other + * control bits like performance/throttle states, VMM may choose to + * control or not upon its own policy. + */ +void processor_extcntl_init(void) +{ + if (!processor_extcntl_ops) + arch_acpi_processor_init_extcntl(&processor_extcntl_ops); +} + +/* + * This is called from ACPI processor init, and targeted to hold + * some tricky housekeeping jobs to satisfy external control model. + * For example, we may put dependency parse stub here for idle + * and performance state. Those information may be not available + * if splitting from dom0 control logic like cpufreq driver. + */ +int processor_extcntl_prepare(struct acpi_processor *pr) +{ + /* parse cstate dependency information */ + if (processor_pm_external()) + processor_extcntl_parse_csd(pr); + + /* Initialize performance states */ + if (processor_pmperf_external()) + processor_extcntl_get_performance(pr); + + return 0; +} + +/* + * Currently no _CSD is implemented which is why existing ACPI code + * doesn't parse _CSD at all. But to keep interface complete with + * external control logic, we put a placeholder here for future + * compatibility. + */ +static int processor_extcntl_parse_csd(struct acpi_processor *pr) +{ + int i; + + for (i = 0; i < pr->power.count; i++) { + if (!pr->power.states[i].valid) + continue; + + /* No dependency by default */ + pr->power.states[i].domain_info = NULL; + pr->power.states[i].csd_count = 0; + } + + return 0; +} + +/* + * Existing ACPI module does parse performance states at some point, + * when acpi-cpufreq driver is loaded which however is something + * we'd like to disable to avoid confliction with external control + * logic. So we have to collect raw performance information here + * when ACPI processor object is found and started. + */ +static int processor_extcntl_get_performance(struct acpi_processor *pr) +{ + int ret; + struct acpi_processor_performance *perf; + struct acpi_psd_package *pdomain; + + if (pr->performance) + return -EBUSY; + + perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL); + if (!perf) + return -ENOMEM; + + pr->performance = perf; + /* Get basic performance state information */ + ret = acpi_processor_get_performance_info(pr); + if (ret < 0) + goto err_out; + + /* + * Well, here we need retrieve performance dependency information + * from _PSD object. The reason why existing interface is not used + * is due to the reason that existing interface sticks to Linux cpu + * id to construct some bitmap, however we want to split ACPI + * processor objects from Linux cpu id logic. For example, even + * when Linux is configured as UP, we still want to parse all ACPI + * processor objects to external logic. In this case, it's preferred + * to use ACPI ID instead. + */ + pdomain = &pr->performance->domain_info; + pdomain->num_processors = 0; + ret = acpi_processor_get_psd(pr); + if (ret < 0) { + /* + * _PSD is optional - assume no coordination if absent (or + * broken), matching native kernels' behavior. + */ + pdomain->num_entries = ACPI_PSD_REV0_ENTRIES; + pdomain->revision = ACPI_PSD_REV0_REVISION; + pdomain->domain = pr->acpi_id; + pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL; + pdomain->num_processors = 1; + } + + /* Some sanity check */ + if ((pdomain->revision != ACPI_PSD_REV0_REVISION) || + (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) || + ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) && + (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) && + (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) { + ret = -EINVAL; + goto err_out; + } + + /* Last step is to notify BIOS that external logic exists */ + processor_notify_smm(); + + processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF); + + return 0; +err_out: + pr->performance = NULL; + kfree(perf); + return ret; +} + +/* + * Objects and functions removed in native 2.6.29, and thus moved here. + */ +#ifdef CONFIG_SMP +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency + * requirement. This means we need to get all processors out of their C-state, + * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that + * wakes them all right up. + */ +static int acpi_processor_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 1); + return NOTIFY_OK; +} + +struct notifier_block acpi_processor_latency_notifier = { + .notifier_call = acpi_processor_latency_notify, +}; +#endif + +/* + * bm_history -- bit-mask with a bit per jiffy of bus-master activity + * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms + * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms + * 100 HZ: 0x0000000F: 4 jiffies = 40ms + * reduce history for more aggressive entry into C3 + */ +static unsigned int bm_history __read_mostly = + (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); +module_param(bm_history, uint, 0644); + +int acpi_processor_set_power_policy(struct acpi_processor *pr) +{ + unsigned int i; + unsigned int state_is_set = 0; + struct acpi_processor_cx *lower = NULL; + struct acpi_processor_cx *higher = NULL; + struct acpi_processor_cx *cx; + + + if (!pr) + return -EINVAL; + + /* + * This function sets the default Cx state policy (OS idle handler). + * Our scheme is to promote quickly to C2 but more conservatively + * to C3. We're favoring C2 for its characteristics of low latency + * (quick response), good power savings, and ability to allow bus + * mastering activity. Note that the Cx state policy is completely + * customizable and can be altered dynamically. + */ + + /* startup state */ + for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { + cx = &pr->power.states[i]; + if (!cx->valid) + continue; + + if (!state_is_set) + pr->power.state = cx; + state_is_set++; + break; + } + + if (!state_is_set) + return -ENODEV; + + /* demotion */ + for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { + cx = &pr->power.states[i]; + if (!cx->valid) + continue; + + if (lower) { + cx->demotion.state = lower; + cx->demotion.threshold.ticks = cx->latency_ticks; + cx->demotion.threshold.count = 1; + if (cx->type == ACPI_STATE_C3) + cx->demotion.threshold.bm = bm_history; + } + + lower = cx; + } + + /* promotion */ + for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { + cx = &pr->power.states[i]; + if (!cx->valid) + continue; + + if (higher) { + cx->promotion.state = higher; + cx->promotion.threshold.ticks = cx->latency_ticks; + if (cx->type >= ACPI_STATE_C2) + cx->promotion.threshold.count = 4; + else + cx->promotion.threshold.count = 10; + if (higher->type == ACPI_STATE_C3) + cx->promotion.threshold.bm = bm_history; + } + + higher = cx; + } + + return 0; +} --- linux-ec2-2.6.31.orig/drivers/acpi/osl.c +++ linux-ec2-2.6.31/drivers/acpi/osl.c @@ -1182,7 +1182,13 @@ res_list_elem->name, (long long) res_list_elem->start, (long long) res_list_elem->end); - printk(KERN_INFO "ACPI: Device needs an ACPI driver\n"); + if (acpi_enforce_resources == ENFORCE_RESOURCES_LAX) + printk(KERN_NOTICE "ACPI: This conflict may" + " cause random problems and system" + " instability\n"); + printk(KERN_INFO "ACPI: If an ACPI driver is available" + " for this device, you should use it instead of" + " the native driver\n"); } if (acpi_enforce_resources == ENFORCE_RESOURCES_STRICT) return -EBUSY; --- linux-ec2-2.6.31.orig/drivers/acpi/scan.c +++ linux-ec2-2.6.31/drivers/acpi/scan.c @@ -187,6 +187,16 @@ } static DEVICE_ATTR(hid, 0444, acpi_device_hid_show, NULL); +#ifdef CONFIG_PCI_GUESTDEV +static ssize_t +acpi_device_uid_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_dev->pnp.unique_id); +} +static DEVICE_ATTR(uid, 0444, acpi_device_uid_show, NULL); +#endif + static ssize_t acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *buf) { struct acpi_device *acpi_dev = to_acpi_device(dev); @@ -231,6 +241,13 @@ goto end; } +#ifdef CONFIG_PCI_GUESTDEV + if(dev->flags.unique_id) { + result = device_create_file(&dev->dev, &dev_attr_uid); + if(result) + goto end; + } +#endif /* * If device has _EJ0, 'eject' file is created that is used to trigger * hot-removal function from userland. @@ -1264,16 +1281,6 @@ acpi_device_set_id(device, parent, handle, type); /* - * The ACPI device is attached to acpi handle before getting - * the power/wakeup/peformance flags. Otherwise OS can't get - * the corresponding ACPI device by the acpi handle in the course - * of getting the power/wakeup/performance flags. - */ - result = acpi_device_set_context(device, type); - if (result) - goto end; - - /* * Power Management * ---------------- */ @@ -1303,6 +1310,8 @@ goto end; } + if ((result = acpi_device_set_context(device, type))) + goto end; result = acpi_device_register(device, parent); --- linux-ec2-2.6.31.orig/drivers/acpi/processor_perflib.c +++ linux-ec2-2.6.31/drivers/acpi/processor_perflib.c @@ -76,6 +76,7 @@ static int acpi_processor_ppc_status; +#ifdef CONFIG_CPU_FREQ static int acpi_processor_ppc_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -118,6 +119,7 @@ static struct notifier_block acpi_ppc_notifier_block = { .notifier_call = acpi_processor_ppc_notifier, }; +#endif /* CONFIG_CPU_FREQ */ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) { @@ -162,9 +164,15 @@ if (ret < 0) return (ret); else +#ifdef CONFIG_CPU_FREQ return cpufreq_update_policy(pr->id); +#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL + return processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_PERF); +#endif } +#ifdef CONFIG_CPU_FREQ void acpi_processor_ppc_init(void) { if (!cpufreq_register_notifier @@ -183,6 +191,7 @@ acpi_processor_ppc_status &= ~PPC_REGISTERED; } +#endif /* CONFIG_CPU_FREQ */ static int acpi_processor_get_performance_control(struct acpi_processor *pr) { @@ -330,7 +339,10 @@ return result; } -static int acpi_processor_get_performance_info(struct acpi_processor *pr) +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL +static +#endif +int acpi_processor_get_performance_info(struct acpi_processor *pr) { int result = 0; acpi_status status = AE_OK; @@ -371,6 +383,7 @@ return result; } +#ifdef CONFIG_CPU_FREQ int acpi_processor_notify_smm(struct module *calling_module) { acpi_status status; @@ -431,8 +444,12 @@ } EXPORT_SYMBOL(acpi_processor_notify_smm); +#endif /* CONFIG_CPU_FREQ */ -static int acpi_processor_get_psd(struct acpi_processor *pr) +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL +static +#endif +int acpi_processor_get_psd(struct acpi_processor *pr) { int result = 0; acpi_status status = AE_OK; --- linux-ec2-2.6.31.orig/drivers/acpi/video_detect.c +++ linux-ec2-2.6.31/drivers/acpi/video_detect.c @@ -82,7 +82,7 @@ return 0; /* Does this device able to support video switching ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) && + if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) || ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOS", &h_dummy))) video_caps |= ACPI_VIDEO_OUTPUT_SWITCHING; --- linux-ec2-2.6.31.orig/drivers/acpi/Kconfig +++ linux-ec2-2.6.31/drivers/acpi/Kconfig @@ -9,7 +9,7 @@ depends on PCI depends on PM select PNP - select CPU_IDLE + select CPU_IDLE if !PROCESSOR_EXTERNAL_CONTROL default y help Advanced Configuration and Power Interface (ACPI) support for @@ -280,6 +280,7 @@ config X86_PM_TIMER bool "Power Management Timer Support" if EMBEDDED depends on X86 + depends on !XEN default y help The Power Management Timer is available on all ACPI-capable, @@ -333,4 +334,13 @@ To compile this driver as a module, choose M here: the modules will be called sbs and sbshc. +config ACPI_PV_SLEEP + bool + depends on X86 && XEN && ACPI_SLEEP + default y + +config PROCESSOR_EXTERNAL_CONTROL + bool + depends on (X86 || IA64) && XEN + default y endif # ACPI --- linux-ec2-2.6.31.orig/drivers/acpi/pci_slot.c +++ linux-ec2-2.6.31/drivers/acpi/pci_slot.c @@ -57,7 +57,7 @@ MY_NAME , ## arg); \ } while (0) -#define SLOT_NAME_SIZE 20 /* Inspired by #define in acpiphp.h */ +#define SLOT_NAME_SIZE 21 /* Inspired by #define in acpiphp.h */ struct acpi_pci_slot { acpi_handle root_handle; /* handle of the root bridge */ @@ -149,7 +149,7 @@ return AE_OK; } - snprintf(name, sizeof(name), "%u", (u32)sun); + snprintf(name, sizeof(name), "%llu", sun); pci_slot = pci_create_slot(pci_bus, device, name, NULL); if (IS_ERR(pci_slot)) { err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot)); --- linux-ec2-2.6.31.orig/drivers/acpi/processor_core.c +++ linux-ec2-2.6.31/drivers/acpi/processor_core.c @@ -645,7 +645,8 @@ */ if (pr->id == -1) { if (ACPI_FAILURE - (acpi_processor_hotadd_init(pr->handle, &pr->id))) { + (acpi_processor_hotadd_init(pr->handle, &pr->id)) && + !processor_cntl_external()) { return -ENODEV; } } @@ -658,7 +659,12 @@ * generated as the following format: * CPU+CPU ID. */ - sprintf(acpi_device_bid(device), "CPU%X", pr->id); + if (pr->id != -1) + sprintf(acpi_device_bid(device), "CPU%X", pr->id); + else + snprintf(acpi_device_bid(device), + ARRAY_SIZE(acpi_device_bid(device)), + "CPUX%X", pr->acpi_id); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, pr->acpi_id)); @@ -690,13 +696,17 @@ * of /proc/cpuinfo */ status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); - if (ACPI_SUCCESS(status)) + if (ACPI_SUCCESS(status) && pr->id != -1) arch_fix_phys_package_id(pr->id, object.integer.value); return 0; } +#ifndef CONFIG_XEN static DEFINE_PER_CPU(void *, processor_device_array); +#else +static void *processor_device_array[NR_ACPI_CPUS]; +#endif static int __cpuinit acpi_processor_start(struct acpi_device *device) { @@ -704,53 +714,83 @@ struct acpi_processor *pr; struct sys_device *sysdev; + processor_extcntl_init(); + pr = acpi_driver_data(device); result = acpi_processor_get_info(device); - if (result) { + if (result || + ((pr->id == -1) && !processor_cntl_external())) { /* Processor is physically not present */ return 0; } - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); + BUG_ON(!processor_cntl_external() && + ((pr->id >= nr_cpu_ids) || (pr->id < 0))); /* * Buggy BIOS check * ACPI id of processors can be reported wrongly by the BIOS. * Don't trust it blindly */ +#ifndef CONFIG_XEN if (per_cpu(processor_device_array, pr->id) != NULL && per_cpu(processor_device_array, pr->id) != device) { +#else + BUG_ON(pr->acpi_id >= NR_ACPI_CPUS); + if (processor_device_array[pr->acpi_id] != NULL && + processor_device_array[pr->acpi_id] != device) { +#endif printk(KERN_WARNING "BIOS reported wrong ACPI id " "for the processor\n"); return -ENODEV; } +#ifndef CONFIG_XEN per_cpu(processor_device_array, pr->id) = device; per_cpu(processors, pr->id) = pr; +#else + processor_device_array[pr->acpi_id] = device; + if (pr->id != -1) + per_cpu(processors, pr->id) = pr; +#endif result = acpi_processor_add_fs(device); if (result) goto end; - sysdev = get_cpu_sysdev(pr->id); - if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) - return -EFAULT; + if (pr->id != -1) { + sysdev = get_cpu_sysdev(pr->id); + if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) + return -EFAULT; + } /* _PDC call should be done before doing anything else (if reqd.). */ arch_acpi_processor_init_pdc(pr); acpi_processor_set_pdc(pr); arch_acpi_processor_cleanup_pdc(pr); -#ifdef CONFIG_CPU_FREQ +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) acpi_processor_ppc_has_changed(pr); #endif - acpi_processor_get_throttling_info(pr); - acpi_processor_get_limit_info(pr); + /* + * pr->id may equal to -1 while processor_cntl_external enabled. + * throttle and thermal module don't support this case. + * Tx only works when dom0 vcpu == pcpu num by far, as we give + * control to dom0. + */ + if (pr->id != -1) { + acpi_processor_get_throttling_info(pr); + acpi_processor_get_limit_info(pr); + } acpi_processor_power_init(pr, device); + result = processor_extcntl_prepare(pr); + if (result) + goto end; + pr->cdev = thermal_cooling_device_register("Processor", device, &processor_cooling_ops); if (IS_ERR(pr->cdev)) { @@ -878,7 +918,7 @@ pr = acpi_driver_data(device); - if (pr->id >= nr_cpu_ids) + if (!processor_cntl_external() && pr->id >= nr_cpu_ids) goto free; if (type == ACPI_BUS_REMOVAL_EJECT) { @@ -888,7 +928,8 @@ acpi_processor_power_exit(pr, device); - sysfs_remove_link(&device->dev.kobj, "sysdev"); + if (pr->id != -1) + sysfs_remove_link(&device->dev.kobj, "sysdev"); acpi_processor_remove_fs(device); @@ -899,8 +940,14 @@ pr->cdev = NULL; } +#ifndef CONFIG_XEN per_cpu(processors, pr->id) = NULL; per_cpu(processor_device_array, pr->id) = NULL; +#else + if (pr->id != -1) + per_cpu(processors, pr->id) = NULL; + processor_device_array[pr->acpi_id] = NULL; +#endif free: free_cpumask_var(pr->throttling.shared_cpu_map); @@ -963,6 +1010,10 @@ if (!pr) return -ENODEV; + if (processor_cntl_external()) + processor_notify_external(pr, + PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD); + if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) { kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE); } @@ -1002,6 +1053,10 @@ break; } + if (processor_cntl_external()) + processor_notify_external(pr, + PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD); + if (pr->id >= 0 && (pr->id < nr_cpu_ids)) { kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); break; @@ -1033,6 +1088,11 @@ if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id))) kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); + + if (processor_cntl_external()) + processor_notify_external(pr, PROCESSOR_HOTPLUG, + HOTPLUG_TYPE_REMOVE); + break; default: ACPI_DEBUG_PRINT((ACPI_DB_INFO, @@ -1097,6 +1157,11 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr) { +#ifdef CONFIG_XEN + if (pr->id == -1) + return (0); +#endif + if (cpu_online(pr->id)) cpu_down(pr->id); --- linux-ec2-2.6.31.orig/drivers/acpi/ac.c +++ linux-ec2-2.6.31/drivers/acpi/ac.c @@ -243,6 +243,7 @@ acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, (u32) ac->state); + acpi_notifier_call_chain(device, event, (u32) ac->state); #ifdef CONFIG_ACPI_SYSFS_POWER kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE); #endif --- linux-ec2-2.6.31.orig/drivers/acpi/processor_idle.c +++ linux-ec2-2.6.31/drivers/acpi/processor_idle.c @@ -104,6 +104,12 @@ /* Actually this shouldn't be __cpuinitdata, would be better to fix the callers to only run once -AK */ static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET")}, (void *)1}, + { set_max_cstate, "Medion 41700", { + DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), + DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, { set_max_cstate, "Clevo 5600D", { DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, @@ -112,6 +118,7 @@ }; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL /* * Callers should disable interrupts before the call and enable * interrupts after return. @@ -130,6 +137,7 @@ } current_thread_info()->status |= TS_POLLING; } +#endif #ifdef ARCH_APICTIMER_STOPS_ON_C3 @@ -194,7 +202,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, struct acpi_processor_cx *cstate) { } static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { } -static void lapic_timer_state_broadcast(struct acpi_processor *pr, +static inline void lapic_timer_state_broadcast(struct acpi_processor *pr, struct acpi_processor_cx *cx, int broadcast) { @@ -242,7 +250,8 @@ return 0; } -#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) +#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) \ + && !defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) static void tsc_check_state(int state) { switch (boot_cpu_data.x86_vendor) { @@ -297,6 +306,17 @@ pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; + /* + * FADT specified C2 latency must be less than or equal to + * 100 microseconds. + */ + if (acpi_gbl_FADT.C2latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "C2 latency too large [%d]\n", acpi_gbl_FADT.C2latency)); + /* invalidate C2 */ + pr->power.states[ACPI_STATE_C2].address = 0; + } + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "lvl2[0x%08x] lvl3[0x%08x]\n", pr->power.states[ACPI_STATE_C2].address, @@ -417,7 +437,8 @@ */ cx.entry_method = ACPI_CSTATE_HALT; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); - } else { + /* This doesn't apply to external control case */ + } else if (!processor_pm_external()) { continue; } if (cx.type == ACPI_STATE_C1 && @@ -456,6 +477,12 @@ cx.power = obj->integer.value; +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL + /* cache control methods to notify external logic */ + if (processor_pm_external()) + memcpy(&cx.reg, reg, sizeof(*reg)); +#endif + current_count++; memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); @@ -493,22 +520,16 @@ return; /* - * C2 latency must be less than or equal to 100 - * microseconds. - */ - else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "latency too large [%d]\n", cx->latency)); - return; - } - - /* * Otherwise we've met all of our C2 requirements. * Normalize the C2 latency to expidite policy */ cx->valid = 1; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL cx->latency_ticks = cx->latency; +#else + cx->latency_ticks = us_to_pm_timer_ticks(cx->latency); +#endif return; } @@ -591,7 +612,11 @@ */ cx->valid = 1; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL cx->latency_ticks = cx->latency; +#else + cx->latency_ticks = us_to_pm_timer_ticks(cx->latency); +#endif /* * On older chipsets, BM_RLD needs to be set * in order for Bus Master activity to wake the @@ -665,6 +690,20 @@ pr->power.count = acpi_processor_power_verify(pr); +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL + /* + * Set Default Policy + * ------------------ + * Now that we know which states are supported, set the default + * policy. Note that this policy can be changed dynamically + * (e.g. encourage deeper sleeps to conserve battery life when + * not on AC). + */ + result = acpi_processor_set_power_policy(pr); + if (result) + return result; +#endif + /* * if one state of type C2 or C3 is available, mark this * CPU as being "idle manageable" @@ -761,6 +800,7 @@ }; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL /** * acpi_idle_bm_check - checks if bus master activity was detected */ @@ -1126,6 +1166,13 @@ return 0; } +#else /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ +static inline int acpi_processor_setup_cpuidle(struct acpi_processor *pr) +{ + return 0; +} +#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ + int acpi_processor_cst_has_changed(struct acpi_processor *pr) { int ret = 0; @@ -1143,6 +1190,14 @@ if (!pr->flags.power_setup_done) return -ENODEV; + if (processor_pm_external()) { + pr->flags.power = 0; + ret = acpi_processor_get_power_info(pr); + processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); + return ret; + } + cpuidle_pause_and_lock(); cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); @@ -1183,6 +1238,10 @@ "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; +#if defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) && defined(CONFIG_SMP) + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, + &acpi_processor_latency_notifier); +#endif } if (!pr) @@ -1225,6 +1284,11 @@ acpi_driver_data(device)); if (!entry) return -EIO; + + if (processor_pm_external()) + processor_notify_external(pr, + PROCESSOR_PM_INIT, PM_TYPE_IDLE); + return 0; } @@ -1241,5 +1305,12 @@ remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, acpi_device_dir(device)); +#if defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) && defined(CONFIG_SMP) + /* Unregister the idle handler when processor #0 is removed. */ + if (pr->id == 0) + pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, + &acpi_processor_latency_notifier); +#endif + return 0; } --- linux-ec2-2.6.31.orig/drivers/acpi/sleep.c +++ linux-ec2-2.6.31/drivers/acpi/sleep.c @@ -60,6 +60,7 @@ static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP +#ifndef CONFIG_ACPI_PV_SLEEP /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { if (!acpi_wakeup_address) { @@ -69,6 +70,7 @@ (acpi_physical_address)acpi_wakeup_address); } +#endif ACPI_FLUSH_CPU_CACHE(); acpi_enable_wakeup_device_prep(acpi_state); #endif @@ -244,7 +246,14 @@ break; case ACPI_STATE_S3: +#ifdef CONFIG_ACPI_PV_SLEEP + /* Hyperviosr will save and restore CPU context + * and then we can skip low level housekeeping here. + */ + acpi_enter_sleep_state(acpi_state); +#else do_suspend_lowlevel(); +#endif break; } @@ -405,6 +414,46 @@ }, }, { + .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard HP Pavilion dv3 Notebook PC", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv3 Notebook PC"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard Pavilion dv4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard Pavilion dv7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv7"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard Compaq Presario C700 Notebook PC", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Compaq Presario C700 Notebook PC"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Hewlett-Packard Compaq Presario CQ40 Notebook PC", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "Compaq Presario CQ40 Notebook PC"), + }, + }, + { .callback = init_old_suspend_ordering, .ident = "Panasonic CF51-2L", .matches = { --- linux-ec2-2.6.31.orig/drivers/acpi/Makefile +++ linux-ec2-2.6.31/drivers/acpi/Makefile @@ -61,3 +61,4 @@ processor-y := processor_core.o processor_throttling.o processor-y += processor_idle.o processor_thermal.o processor-$(CONFIG_CPU_FREQ) += processor_perflib.o +processor-$(CONFIG_PROCESSOR_EXTERNAL_CONTROL) += processor_perflib.o processor_extcntl.o --- linux-ec2-2.6.31.orig/drivers/acpi/acpica/hwsleep.c +++ linux-ec2-2.6.31/drivers/acpi/acpica/hwsleep.c @@ -235,7 +235,11 @@ u32 pm1b_control; struct acpi_bit_register_info *sleep_type_reg_info; struct acpi_bit_register_info *sleep_enable_reg_info; +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) u32 in_value; +#else + int err; +#endif struct acpi_object_list arg_list; union acpi_object arg; acpi_status status; @@ -344,6 +348,7 @@ /* Write #2: Write both SLP_TYP + SLP_EN */ +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86)) status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); @@ -383,6 +388,15 @@ /* Spin until we wake */ } while (!in_value); +#else + /* PV ACPI just need check hypercall return value */ + err = acpi_notify_hypervisor_state(sleep_state, + pm1a_control, pm1b_control); + if (err) { + printk(KERN_ERR "ACPI: Hypervisor failure [%d]\n", err); + return_ACPI_STATUS(AE_ERROR); + } +#endif return_ACPI_STATUS(AE_OK); } @@ -401,6 +415,7 @@ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED * ******************************************************************************/ +#ifndef CONFIG_XEN acpi_status asmlinkage acpi_enter_sleep_state_s4bios(void) { u32 in_value; @@ -454,6 +469,7 @@ } ACPI_EXPORT_SYMBOL(acpi_enter_sleep_state_s4bios) +#endif /******************************************************************************* * --- linux-ec2-2.6.31.orig/drivers/macintosh/windfarm_max6690_sensor.c +++ linux-ec2-2.6.31/drivers/macintosh/windfarm_max6690_sensor.c @@ -88,6 +88,8 @@ return rc; } +static struct i2c_driver wf_max6690_driver; + static struct i2c_client *wf_max6690_create(struct i2c_adapter *adapter, u8 addr, const char *loc) { @@ -119,7 +121,7 @@ * Let i2c-core delete that device on driver removal. * This is safe because i2c-core holds the core_lock mutex for us. */ - list_add_tail(&client->detected, &client->driver->clients); + list_add_tail(&client->detected, &wf_max6690_driver.clients); return client; fail: --- linux-ec2-2.6.31.orig/drivers/macintosh/windfarm_smu_sat.c +++ linux-ec2-2.6.31/drivers/macintosh/windfarm_smu_sat.c @@ -194,6 +194,8 @@ .owner = THIS_MODULE, }; +static struct i2c_driver wf_sat_driver; + static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev) { struct i2c_board_info info; @@ -222,7 +224,7 @@ * Let i2c-core delete that device on driver removal. * This is safe because i2c-core holds the core_lock mutex for us. */ - list_add_tail(&client->detected, &client->driver->clients); + list_add_tail(&client->detected, &wf_sat_driver.clients); } static int wf_sat_probe(struct i2c_client *client, --- linux-ec2-2.6.31.orig/drivers/macintosh/therm_adt746x.c +++ linux-ec2-2.6.31/drivers/macintosh/therm_adt746x.c @@ -79,6 +79,7 @@ u8 limits[3]; int last_speed[2]; int last_var[2]; + int pwm_inv[2]; }; static enum {ADT7460, ADT7467} therm_type; @@ -124,6 +125,8 @@ return data; } +static struct i2c_driver thermostat_driver; + static int attach_thermostat(struct i2c_adapter *adapter) { @@ -148,7 +151,7 @@ * Let i2c-core delete that device on driver removal. * This is safe because i2c-core holds the core_lock mutex for us. */ - list_add_tail(&client->detected, &client->driver->clients); + list_add_tail(&client->detected, &thermostat_driver.clients); return 0; } @@ -227,19 +230,23 @@ if (speed >= 0) { manual = read_reg(th, MANUAL_MODE[fan]); + manual &= ~INVERT_MASK; write_reg(th, MANUAL_MODE[fan], - (manual|MANUAL_MASK) & (~INVERT_MASK)); + manual | MANUAL_MASK | th->pwm_inv[fan]); write_reg(th, FAN_SPD_SET[fan], speed); } else { /* back to automatic */ if(therm_type == ADT7460) { manual = read_reg(th, MANUAL_MODE[fan]) & (~MANUAL_MASK); - + manual &= ~INVERT_MASK; + manual |= th->pwm_inv[fan]; write_reg(th, MANUAL_MODE[fan], manual|REM_CONTROL[fan]); } else { manual = read_reg(th, MANUAL_MODE[fan]); + manual &= ~INVERT_MASK; + manual |= th->pwm_inv[fan]; write_reg(th, MANUAL_MODE[fan], manual&(~AUTO_MASK)); } } @@ -416,6 +423,10 @@ thermostat = th; + /* record invert bit status because fw can corrupt it after suspend */ + th->pwm_inv[0] = read_reg(th, MANUAL_MODE[0]) & INVERT_MASK; + th->pwm_inv[1] = read_reg(th, MANUAL_MODE[1]) & INVERT_MASK; + /* be sure to really write fan speed the first time */ th->last_speed[0] = -2; th->last_speed[1] = -2; --- linux-ec2-2.6.31.orig/drivers/macintosh/therm_pm72.c +++ linux-ec2-2.6.31/drivers/macintosh/therm_pm72.c @@ -286,6 +286,8 @@ }, }; +static struct i2c_driver therm_pm72_driver; + /* * Utility function to create an i2c_client structure and * attach it to one of u3 adapters @@ -318,7 +320,7 @@ * Let i2c-core delete that device on driver removal. * This is safe because i2c-core holds the core_lock mutex for us. */ - list_add_tail(&clt->detected, &clt->driver->clients); + list_add_tail(&clt->detected, &therm_pm72_driver.clients); return clt; } --- linux-ec2-2.6.31.orig/drivers/macintosh/via-pmu.c +++ linux-ec2-2.6.31/drivers/macintosh/via-pmu.c @@ -405,7 +405,11 @@ printk(KERN_ERR "via-pmu: can't map interrupt\n"); return -ENODEV; } - if (request_irq(irq, via_pmu_interrupt, 0, "VIA-PMU", (void *)0)) { + /* We set IRQF_TIMER because we don't want the interrupt to be disabled + * between the 2 passes of driver suspend, we control our own disabling + * for that one + */ + if (request_irq(irq, via_pmu_interrupt, IRQF_TIMER, "VIA-PMU", (void *)0)) { printk(KERN_ERR "via-pmu: can't request irq %d\n", irq); return -ENODEV; } @@ -419,7 +423,7 @@ gpio_irq = irq_of_parse_and_map(gpio_node, 0); if (gpio_irq != NO_IRQ) { - if (request_irq(gpio_irq, gpio1_interrupt, 0, + if (request_irq(gpio_irq, gpio1_interrupt, IRQF_TIMER, "GPIO1 ADB", (void *)0)) printk(KERN_ERR "pmu: can't get irq %d" " (GPIO1)\n", gpio_irq); @@ -925,8 +929,7 @@ #ifdef CONFIG_ADB /* Send an ADB command */ -static int -pmu_send_request(struct adb_request *req, int sync) +static int pmu_send_request(struct adb_request *req, int sync) { int i, ret; @@ -1005,16 +1008,11 @@ } /* Enable/disable autopolling */ -static int -pmu_adb_autopoll(int devs) +static int __pmu_adb_autopoll(int devs) { struct adb_request req; - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) - return -ENXIO; - if (devs) { - adb_dev_map = devs; pmu_request(&req, NULL, 5, PMU_ADB_CMD, 0, 0x86, adb_dev_map >> 8, adb_dev_map); pmu_adb_flags = 2; @@ -1027,9 +1025,17 @@ return 0; } +static int pmu_adb_autopoll(int devs) +{ + if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + return -ENXIO; + + adb_dev_map = devs; + return __pmu_adb_autopoll(devs); +} + /* Reset the ADB bus */ -static int -pmu_adb_reset_bus(void) +static int pmu_adb_reset_bus(void) { struct adb_request req; int save_autopoll = adb_dev_map; @@ -1038,13 +1044,13 @@ return -ENXIO; /* anyone got a better idea?? */ - pmu_adb_autopoll(0); + __pmu_adb_autopoll(0); - req.nbytes = 5; + req.nbytes = 4; req.done = NULL; req.data[0] = PMU_ADB_CMD; - req.data[1] = 0; - req.data[2] = ADB_BUSRESET; + req.data[1] = ADB_BUSRESET; + req.data[2] = 0; req.data[3] = 0; req.data[4] = 0; req.reply_len = 0; @@ -1056,7 +1062,7 @@ pmu_wait_complete(&req); if (save_autopoll != 0) - pmu_adb_autopoll(save_autopoll); + __pmu_adb_autopoll(save_autopoll); return 0; } --- linux-ec2-2.6.31.orig/drivers/macintosh/windfarm_lm75_sensor.c +++ linux-ec2-2.6.31/drivers/macintosh/windfarm_lm75_sensor.c @@ -115,6 +115,8 @@ return rc; } +static struct i2c_driver wf_lm75_driver; + static struct i2c_client *wf_lm75_create(struct i2c_adapter *adapter, u8 addr, int ds1775, const char *loc) @@ -157,7 +159,7 @@ * Let i2c-core delete that device on driver removal. * This is safe because i2c-core holds the core_lock mutex for us. */ - list_add_tail(&client->detected, &client->driver->clients); + list_add_tail(&client->detected, &wf_lm75_driver.clients); return client; fail: return NULL; --- linux-ec2-2.6.31.orig/drivers/usb/mon/mon_bin.c +++ linux-ec2-2.6.31/drivers/usb/mon/mon_bin.c @@ -350,12 +350,12 @@ /* * Return a few (kilo-)bytes to the head of the buffer. - * This is used if a DMA fetch fails. + * This is used if a data fetch fails. */ static void mon_buff_area_shrink(struct mon_reader_bin *rp, unsigned int size) { - size = (size + PKT_ALIGN-1) & ~(PKT_ALIGN-1); + /* size &= ~(PKT_ALIGN-1); -- we're called with aligned size */ rp->b_cnt -= size; if (rp->b_in < size) rp->b_in += rp->b_size; @@ -442,6 +442,7 @@ unsigned int urb_length; unsigned int offset; unsigned int length; + unsigned int delta; unsigned int ndesc, lendesc; unsigned char dir; struct mon_bin_hdr *ep; @@ -546,8 +547,10 @@ if (length != 0) { ep->flag_data = mon_bin_get_data(rp, offset, urb, length); if (ep->flag_data != 0) { /* Yes, it's 0x00, not '0' */ - ep->len_cap = 0; - mon_buff_area_shrink(rp, length); + delta = (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); + ep->len_cap -= length; + delta -= (ep->len_cap + PKT_ALIGN-1) & ~(PKT_ALIGN-1); + mon_buff_area_shrink(rp, delta); } } else { ep->flag_data = data_tag; --- linux-ec2-2.6.31.orig/drivers/usb/gadget/amd5536udc.c +++ linux-ec2-2.6.31/drivers/usb/gadget/amd5536udc.c @@ -1213,7 +1213,12 @@ tmp &= AMD_UNMASK_BIT(ep->num); writel(tmp, &dev->regs->ep_irqmsk); } - } + } else if (ep->in) { + /* enable ep irq */ + tmp = readl(&dev->regs->ep_irqmsk); + tmp &= AMD_UNMASK_BIT(ep->num); + writel(tmp, &dev->regs->ep_irqmsk); + } } else if (ep->dma) { @@ -2005,18 +2010,17 @@ { int tmp; - /* empty queues and init hardware */ - udc_basic_init(dev); - for (tmp = 0; tmp < UDC_EP_NUM; tmp++) { - empty_req_queue(&dev->ep[tmp]); - } - if (dev->gadget.speed != USB_SPEED_UNKNOWN) { spin_unlock(&dev->lock); driver->disconnect(&dev->gadget); spin_lock(&dev->lock); } - /* init */ + + /* empty queues and init hardware */ + udc_basic_init(dev); + for (tmp = 0; tmp < UDC_EP_NUM; tmp++) + empty_req_queue(&dev->ep[tmp]); + udc_setup_endpoints(dev); } @@ -2478,6 +2482,13 @@ } } + } else if (!use_dma && ep->in) { + /* disable interrupt */ + tmp = readl( + &dev->regs->ep_irqmsk); + tmp |= AMD_BIT(ep->num); + writel(tmp, + &dev->regs->ep_irqmsk); } } /* clear status bits */ @@ -3285,6 +3296,17 @@ goto finished; } + spin_lock_init(&dev->lock); + /* udc csr registers base */ + dev->csr = dev->virt_addr + UDC_CSR_ADDR; + /* dev registers base */ + dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; + /* ep registers base */ + dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; + /* fifo's base */ + dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); + dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); + if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { dev_dbg(&dev->pdev->dev, "request_irq(%d) fail\n", pdev->irq); kfree(dev); @@ -3337,7 +3359,6 @@ udc_pollstall_timer.data = 0; /* device struct setup */ - spin_lock_init(&dev->lock); dev->gadget.ops = &udc_ops; dev_set_name(&dev->gadget.dev, "gadget"); @@ -3346,16 +3367,6 @@ dev->gadget.name = name; dev->gadget.is_dualspeed = 1; - /* udc csr registers base */ - dev->csr = dev->virt_addr + UDC_CSR_ADDR; - /* dev registers base */ - dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; - /* ep registers base */ - dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; - /* fifo's base */ - dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); - dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); - /* init registers, interrupts, ... */ startup_registers(dev); --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci-hcd.c +++ linux-ec2-2.6.31/drivers/usb/host/xhci-hcd.c @@ -22,12 +22,18 @@ #include #include +#include #include "xhci.h" #define DRIVER_AUTHOR "Sarah Sharp" #define DRIVER_DESC "'eXtensible' Host Controller (xHC) Driver" +/* Some 0.95 hardware can't handle the chain bit on a Link TRB being cleared */ +static int link_quirk; +module_param(link_quirk, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB"); + /* TODO: copied from ehci-hcd.c - can this be refactored? */ /* * handshake - spin reading hc until handshake completes or fails @@ -214,6 +220,12 @@ xhci_dbg(xhci, "xhci_init\n"); spin_lock_init(&xhci->lock); + if (link_quirk) { + xhci_dbg(xhci, "QUIRK: Not clearing Link TRB chain bits.\n"); + xhci->quirks |= XHCI_LINK_TRB_QUIRK; + } else { + xhci_dbg(xhci, "xHCI doesn't need link TRB QUIRK\n"); + } retval = xhci_mem_init(xhci, GFP_KERNEL); xhci_dbg(xhci, "Finished xhci_init\n"); @@ -555,13 +567,22 @@ return 1 << (xhci_get_endpoint_index(desc) + 1); } +/* Find the flag for this endpoint (for use in the control context). Use the + * endpoint index to create a bitmask. The slot context is bit 0, endpoint 0 is + * bit 1, etc. + */ +unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index) +{ + return 1 << (ep_index + 1); +} + /* Compute the last valid endpoint context index. Basically, this is the * endpoint index plus one. For slot contexts with more than valid endpoint, * we find the most significant bit set in the added contexts flags. * e.g. ep 1 IN (with epnum 0x81) => added_ctxs = 0b1000 * fls(0b1000) = 4, but the endpoint context index is 3, so subtract one. */ -static inline unsigned int xhci_last_valid_endpoint(u32 added_ctxs) +unsigned int xhci_last_valid_endpoint(u32 added_ctxs) { return fls(added_ctxs) - 1; } @@ -589,6 +610,70 @@ return 1; } +static int xhci_configure_endpoint(struct xhci_hcd *xhci, + struct usb_device *udev, struct xhci_virt_device *virt_dev, + bool ctx_change); + +/* + * Full speed devices may have a max packet size greater than 8 bytes, but the + * USB core doesn't know that until it reads the first 8 bytes of the + * descriptor. If the usb_device's max packet size changes after that point, + * we need to issue an evaluate context command and wait on it. + */ +static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, + unsigned int ep_index, struct urb *urb) +{ + struct xhci_container_ctx *in_ctx; + struct xhci_container_ctx *out_ctx; + struct xhci_input_control_ctx *ctrl_ctx; + struct xhci_ep_ctx *ep_ctx; + int max_packet_size; + int hw_max_packet_size; + int ret = 0; + + out_ctx = xhci->devs[slot_id]->out_ctx; + ep_ctx = xhci_get_ep_ctx(xhci, out_ctx, ep_index); + hw_max_packet_size = MAX_PACKET_DECODED(ep_ctx->ep_info2); + max_packet_size = urb->dev->ep0.desc.wMaxPacketSize; + if (hw_max_packet_size != max_packet_size) { + xhci_dbg(xhci, "Max Packet Size for ep 0 changed.\n"); + xhci_dbg(xhci, "Max packet size in usb_device = %d\n", + max_packet_size); + xhci_dbg(xhci, "Max packet size in xHCI HW = %d\n", + hw_max_packet_size); + xhci_dbg(xhci, "Issuing evaluate context command.\n"); + + /* Set up the modified control endpoint 0 */ + xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index); + in_ctx = xhci->devs[slot_id]->in_ctx; + ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index); + ep_ctx->ep_info2 &= ~MAX_PACKET_MASK; + ep_ctx->ep_info2 |= MAX_PACKET(max_packet_size); + + /* Set up the input context flags for the command */ + /* FIXME: This won't work if a non-default control endpoint + * changes max packet sizes. + */ + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + ctrl_ctx->add_flags = EP0_FLAG; + ctrl_ctx->drop_flags = 0; + + xhci_dbg(xhci, "Slot %d input context\n", slot_id); + xhci_dbg_ctx(xhci, in_ctx, ep_index); + xhci_dbg(xhci, "Slot %d output context\n", slot_id); + xhci_dbg_ctx(xhci, out_ctx, ep_index); + + ret = xhci_configure_endpoint(xhci, urb->dev, + xhci->devs[slot_id], true); + + /* Clean up the input context for later use by bandwidth + * functions. + */ + ctrl_ctx->add_flags = SLOT_FLAG; + } + return ret; +} + /* * non-error returns are a promise to giveback() the urb later * we drop ownership so next owner (or urb unlink) can get it @@ -600,13 +685,13 @@ int ret = 0; unsigned int slot_id, ep_index; + if (!urb || xhci_check_args(hcd, urb->dev, urb->ep, true, __func__) <= 0) return -EINVAL; slot_id = urb->dev->slot_id; ep_index = xhci_get_endpoint_index(&urb->ep->desc); - spin_lock_irqsave(&xhci->lock, flags); if (!xhci->devs || !xhci->devs[slot_id]) { if (!in_interrupt()) dev_warn(&urb->dev->dev, "WARN: urb submitted for dev with no Slot ID\n"); @@ -619,19 +704,38 @@ ret = -ESHUTDOWN; goto exit; } - if (usb_endpoint_xfer_control(&urb->ep->desc)) + if (usb_endpoint_xfer_control(&urb->ep->desc)) { + /* Check to see if the max packet size for the default control + * endpoint changed during FS device enumeration + */ + if (urb->dev->speed == USB_SPEED_FULL) { + ret = xhci_check_maxpacket(xhci, slot_id, + ep_index, urb); + if (ret < 0) + return ret; + } + /* We have a spinlock and interrupts disabled, so we must pass * atomic context to this function, which may allocate memory. */ + spin_lock_irqsave(&xhci->lock, flags); ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); - else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) + spin_unlock_irqrestore(&xhci->lock, flags); + } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) { + spin_lock_irqsave(&xhci->lock, flags); ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); - else + spin_unlock_irqrestore(&xhci->lock, flags); + } else if (usb_endpoint_xfer_int(&urb->ep->desc)) { + spin_lock_irqsave(&xhci->lock, flags); + ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb, + slot_id, ep_index); + spin_unlock_irqrestore(&xhci->lock, flags); + } else { ret = -EINVAL; + } exit: - spin_unlock_irqrestore(&xhci->lock, flags); return ret; } @@ -930,6 +1034,122 @@ } } +static int xhci_configure_endpoint_result(struct xhci_hcd *xhci, + struct usb_device *udev, struct xhci_virt_device *virt_dev) +{ + int ret; + + switch (virt_dev->cmd_status) { + case COMP_ENOMEM: + dev_warn(&udev->dev, "Not enough host controller resources " + "for new device state.\n"); + ret = -ENOMEM; + /* FIXME: can we allocate more resources for the HC? */ + break; + case COMP_BW_ERR: + dev_warn(&udev->dev, "Not enough bandwidth " + "for new device state.\n"); + ret = -ENOSPC; + /* FIXME: can we go back to the old state? */ + break; + case COMP_TRB_ERR: + /* the HCD set up something wrong */ + dev_warn(&udev->dev, "ERROR: Endpoint drop flag = 0, " + "add flag = 1, " + "and endpoint is not disabled.\n"); + ret = -EINVAL; + break; + case COMP_SUCCESS: + dev_dbg(&udev->dev, "Successful Endpoint Configure command\n"); + ret = 0; + break; + default: + xhci_err(xhci, "ERROR: unexpected command completion " + "code 0x%x.\n", virt_dev->cmd_status); + ret = -EINVAL; + break; + } + return ret; +} + +static int xhci_evaluate_context_result(struct xhci_hcd *xhci, + struct usb_device *udev, struct xhci_virt_device *virt_dev) +{ + int ret; + + switch (virt_dev->cmd_status) { + case COMP_EINVAL: + dev_warn(&udev->dev, "WARN: xHCI driver setup invalid evaluate " + "context command.\n"); + ret = -EINVAL; + break; + case COMP_EBADSLT: + dev_warn(&udev->dev, "WARN: slot not enabled for" + "evaluate context command.\n"); + case COMP_CTX_STATE: + dev_warn(&udev->dev, "WARN: invalid context state for " + "evaluate context command.\n"); + xhci_dbg_ctx(xhci, virt_dev->out_ctx, 1); + ret = -EINVAL; + break; + case COMP_SUCCESS: + dev_dbg(&udev->dev, "Successful evaluate context command\n"); + ret = 0; + break; + default: + xhci_err(xhci, "ERROR: unexpected command completion " + "code 0x%x.\n", virt_dev->cmd_status); + ret = -EINVAL; + break; + } + return ret; +} + +/* Issue a configure endpoint command or evaluate context command + * and wait for it to finish. + */ +static int xhci_configure_endpoint(struct xhci_hcd *xhci, + struct usb_device *udev, struct xhci_virt_device *virt_dev, + bool ctx_change) +{ + int ret; + int timeleft; + unsigned long flags; + + spin_lock_irqsave(&xhci->lock, flags); + if (!ctx_change) + ret = xhci_queue_configure_endpoint(xhci, virt_dev->in_ctx->dma, + udev->slot_id); + else + ret = xhci_queue_evaluate_context(xhci, virt_dev->in_ctx->dma, + udev->slot_id); + if (ret < 0) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_dbg(xhci, "FIXME allocate a new ring segment\n"); + return -ENOMEM; + } + xhci_ring_cmd_db(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); + + /* Wait for the configure endpoint command to complete */ + timeleft = wait_for_completion_interruptible_timeout( + &virt_dev->cmd_completion, + USB_CTRL_SET_TIMEOUT); + if (timeleft <= 0) { + xhci_warn(xhci, "%s while waiting for %s command\n", + timeleft == 0 ? "Timeout" : "Signal", + ctx_change == 0 ? + "configure endpoint" : + "evaluate context"); + /* FIXME cancel the configure endpoint command */ + return -ETIME; + } + + if (!ctx_change) + return xhci_configure_endpoint_result(xhci, udev, virt_dev); + return xhci_evaluate_context_result(xhci, udev, virt_dev); +} + /* Called after one or more calls to xhci_add_endpoint() or * xhci_drop_endpoint(). If this call fails, the USB core is expected * to call xhci_reset_bandwidth(). @@ -944,8 +1164,6 @@ { int i; int ret = 0; - int timeleft; - unsigned long flags; struct xhci_hcd *xhci; struct xhci_virt_device *virt_dev; struct xhci_input_control_ctx *ctrl_ctx; @@ -975,56 +1193,7 @@ xhci_dbg_ctx(xhci, virt_dev->in_ctx, LAST_CTX_TO_EP_NUM(slot_ctx->dev_info)); - spin_lock_irqsave(&xhci->lock, flags); - ret = xhci_queue_configure_endpoint(xhci, virt_dev->in_ctx->dma, - udev->slot_id); - if (ret < 0) { - spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "FIXME allocate a new ring segment\n"); - return -ENOMEM; - } - xhci_ring_cmd_db(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); - - /* Wait for the configure endpoint command to complete */ - timeleft = wait_for_completion_interruptible_timeout( - &virt_dev->cmd_completion, - USB_CTRL_SET_TIMEOUT); - if (timeleft <= 0) { - xhci_warn(xhci, "%s while waiting for configure endpoint command\n", - timeleft == 0 ? "Timeout" : "Signal"); - /* FIXME cancel the configure endpoint command */ - return -ETIME; - } - - switch (virt_dev->cmd_status) { - case COMP_ENOMEM: - dev_warn(&udev->dev, "Not enough host controller resources " - "for new device state.\n"); - ret = -ENOMEM; - /* FIXME: can we allocate more resources for the HC? */ - break; - case COMP_BW_ERR: - dev_warn(&udev->dev, "Not enough bandwidth " - "for new device state.\n"); - ret = -ENOSPC; - /* FIXME: can we go back to the old state? */ - break; - case COMP_TRB_ERR: - /* the HCD set up something wrong */ - dev_warn(&udev->dev, "ERROR: Endpoint drop flag = 0, add flag = 1, " - "and endpoint is not disabled.\n"); - ret = -EINVAL; - break; - case COMP_SUCCESS: - dev_dbg(&udev->dev, "Successful Endpoint Configure command\n"); - break; - default: - xhci_err(xhci, "ERROR: unexpected command completion " - "code 0x%x.\n", virt_dev->cmd_status); - ret = -EINVAL; - break; - } + ret = xhci_configure_endpoint(xhci, udev, virt_dev, false); if (ret) { /* Callee should call reset_bandwidth() */ return ret; @@ -1075,6 +1244,75 @@ xhci_zero_in_ctx(xhci, virt_dev); } +void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci, + unsigned int slot_id, unsigned int ep_index, + struct xhci_dequeue_state *deq_state) +{ + struct xhci_container_ctx *in_ctx; + struct xhci_input_control_ctx *ctrl_ctx; + struct xhci_ep_ctx *ep_ctx; + u32 added_ctxs; + dma_addr_t addr; + + xhci_endpoint_copy(xhci, xhci->devs[slot_id], ep_index); + in_ctx = xhci->devs[slot_id]->in_ctx; + ep_ctx = xhci_get_ep_ctx(xhci, in_ctx, ep_index); + addr = xhci_trb_virt_to_dma(deq_state->new_deq_seg, + deq_state->new_deq_ptr); + if (addr == 0) { + xhci_warn(xhci, "WARN Cannot submit config ep after " + "reset ep command\n"); + xhci_warn(xhci, "WARN deq seg = %p, deq ptr = %p\n", + deq_state->new_deq_seg, + deq_state->new_deq_ptr); + return; + } + ep_ctx->deq = addr | deq_state->new_cycle_state; + + xhci_slot_copy(xhci, xhci->devs[slot_id]); + + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + added_ctxs = xhci_get_endpoint_flag_from_index(ep_index); + ctrl_ctx->add_flags = added_ctxs | SLOT_FLAG; + ctrl_ctx->drop_flags = added_ctxs; + + xhci_dbg(xhci, "Slot ID %d Input Context:\n", slot_id); + xhci_dbg_ctx(xhci, in_ctx, ep_index); +} + +void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, + struct usb_device *udev, + unsigned int ep_index, struct xhci_ring *ep_ring) +{ + struct xhci_dequeue_state deq_state; + + xhci_dbg(xhci, "Cleaning up stalled endpoint ring\n"); + /* We need to move the HW's dequeue pointer past this TD, + * or it will attempt to resend it on the next doorbell ring. + */ + xhci_find_new_dequeue_state(xhci, udev->slot_id, + ep_index, ep_ring->stopped_td, + &deq_state); + + /* HW with the reset endpoint quirk will use the saved dequeue state to + * issue a configure endpoint command later. + */ + if (!(xhci->quirks & XHCI_RESET_EP_QUIRK)) { + xhci_dbg(xhci, "Queueing new dequeue state\n"); + xhci_queue_new_dequeue_state(xhci, ep_ring, + udev->slot_id, + ep_index, &deq_state); + } else { + /* Better hope no one uses the input context between now and the + * reset endpoint completion! + */ + xhci_dbg(xhci, "Setting up input context for " + "configure endpoint command\n"); + xhci_setup_input_ctx_for_quirk(xhci, udev->slot_id, + ep_index, &deq_state); + } +} + /* Deal with stalled endpoints. The core should have sent the control message * to clear the halt condition. However, we need to make the xHCI hardware * reset its sequence number, since a device will expect a sequence number of @@ -1089,7 +1327,6 @@ unsigned int ep_index; unsigned long flags; int ret; - struct xhci_dequeue_state deq_state; struct xhci_ring *ep_ring; xhci = hcd_to_xhci(hcd); @@ -1106,6 +1343,10 @@ ep->desc.bEndpointAddress); return; } + if (usb_endpoint_xfer_control(&ep->desc)) { + xhci_dbg(xhci, "Control endpoint stall already handled.\n"); + return; + } xhci_dbg(xhci, "Queueing reset endpoint command\n"); spin_lock_irqsave(&xhci->lock, flags); @@ -1116,16 +1357,7 @@ * command. Better hope that last command worked! */ if (!ret) { - xhci_dbg(xhci, "Cleaning up stalled endpoint ring\n"); - /* We need to move the HW's dequeue pointer past this TD, - * or it will attempt to resend it on the next doorbell ring. - */ - xhci_find_new_dequeue_state(xhci, udev->slot_id, - ep_index, ep_ring->stopped_td, &deq_state); - xhci_dbg(xhci, "Queueing new dequeue state\n"); - xhci_queue_new_dequeue_state(xhci, ep_ring, - udev->slot_id, - ep_index, &deq_state); + xhci_cleanup_stalled_ring(xhci, udev, ep_index, ep_ring); kfree(ep_ring->stopped_td); xhci_ring_cmd_db(xhci); } --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci-mem.c +++ linux-ec2-2.6.31/drivers/usb/host/xhci-mem.c @@ -94,6 +94,9 @@ val = prev->trbs[TRBS_PER_SEGMENT-1].link.control; val &= ~TRB_TYPE_BITMASK; val |= TRB_TYPE(TRB_LINK); + /* Always set the chain bit with 0.95 hardware */ + if (xhci_link_trb_quirk(xhci)) + val |= TRB_CHAIN; prev->trbs[TRBS_PER_SEGMENT-1].link.control = val; } xhci_dbg(xhci, "Linking segment 0x%llx to segment 0x%llx (DMA)\n", @@ -398,15 +401,28 @@ /* Step 5 */ ep0_ctx->ep_info2 = EP_TYPE(CTRL_EP); /* - * See section 4.3 bullet 6: - * The default Max Packet size for ep0 is "8 bytes for a USB2 - * LS/FS/HS device or 512 bytes for a USB3 SS device" * XXX: Not sure about wireless USB devices. */ - if (udev->speed == USB_SPEED_SUPER) + switch (udev->speed) { + case USB_SPEED_SUPER: ep0_ctx->ep_info2 |= MAX_PACKET(512); - else + break; + case USB_SPEED_HIGH: + /* USB core guesses at a 64-byte max packet first for FS devices */ + case USB_SPEED_FULL: + ep0_ctx->ep_info2 |= MAX_PACKET(64); + break; + case USB_SPEED_LOW: ep0_ctx->ep_info2 |= MAX_PACKET(8); + break; + case USB_SPEED_VARIABLE: + xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n"); + return -EINVAL; + break; + default: + /* New speed? */ + BUG(); + } /* EP 0 can handle "burst" sizes of 1, so Max Burst Size field is 0 */ ep0_ctx->ep_info2 |= MAX_BURST(0); ep0_ctx->ep_info2 |= ERROR_COUNT(3); @@ -598,6 +614,44 @@ */ } +/* Copy output xhci_ep_ctx to the input xhci_ep_ctx copy. + * Useful when you want to change one particular aspect of the endpoint and then + * issue a configure endpoint command. + */ +void xhci_endpoint_copy(struct xhci_hcd *xhci, + struct xhci_virt_device *vdev, unsigned int ep_index) +{ + struct xhci_ep_ctx *out_ep_ctx; + struct xhci_ep_ctx *in_ep_ctx; + + out_ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index); + in_ep_ctx = xhci_get_ep_ctx(xhci, vdev->in_ctx, ep_index); + + in_ep_ctx->ep_info = out_ep_ctx->ep_info; + in_ep_ctx->ep_info2 = out_ep_ctx->ep_info2; + in_ep_ctx->deq = out_ep_ctx->deq; + in_ep_ctx->tx_info = out_ep_ctx->tx_info; +} + +/* Copy output xhci_slot_ctx to the input xhci_slot_ctx. + * Useful when you want to change one particular aspect of the endpoint and then + * issue a configure endpoint command. Only the context entries field matters, + * but we'll copy the whole thing anyway. + */ +void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev) +{ + struct xhci_slot_ctx *in_slot_ctx; + struct xhci_slot_ctx *out_slot_ctx; + + in_slot_ctx = xhci_get_slot_ctx(xhci, vdev->in_ctx); + out_slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx); + + in_slot_ctx->dev_info = out_slot_ctx->dev_info; + in_slot_ctx->dev_info2 = out_slot_ctx->dev_info2; + in_slot_ctx->tt_info = out_slot_ctx->tt_info; + in_slot_ctx->dev_state = out_slot_ctx->dev_state; +} + /* Set up the scratchpad buffer array and scratchpad buffers, if needed. */ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags) { @@ -702,9 +756,11 @@ int i; /* Free the Event Ring Segment Table and the actual Event Ring */ - xhci_writel(xhci, 0, &xhci->ir_set->erst_size); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_base); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_dequeue); + if (xhci->ir_set) { + xhci_writel(xhci, 0, &xhci->ir_set->erst_size); + xhci_write_64(xhci, 0, &xhci->ir_set->erst_base); + xhci_write_64(xhci, 0, &xhci->ir_set->erst_dequeue); + } size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); if (xhci->erst.entries) pci_free_consistent(pdev, size, @@ -741,9 +797,9 @@ xhci->dcbaa, xhci->dcbaa->dma); xhci->dcbaa = NULL; + scratchpad_free(xhci); xhci->page_size = 0; xhci->page_shift = 0; - scratchpad_free(xhci); } int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) --- linux-ec2-2.6.31.orig/drivers/usb/host/ohci-hcd.c +++ linux-ec2-2.6.31/drivers/usb/host/ohci-hcd.c @@ -88,6 +88,7 @@ #ifdef CONFIG_PCI static void quirk_amd_pll(int state); static void amd_iso_dev_put(void); +static void sb800_prefetch(struct ohci_hcd *ohci, int on); #else static inline void quirk_amd_pll(int state) { @@ -97,6 +98,10 @@ { return; } +static inline void sb800_prefetch(struct ohci_hcd *ohci, int on) +{ + return; +} #endif --- linux-ec2-2.6.31.orig/drivers/usb/host/ohci-pci.c +++ linux-ec2-2.6.31/drivers/usb/host/ohci-pci.c @@ -177,6 +177,13 @@ return 0; pci_read_config_byte(amd_smbus_dev, PCI_REVISION_ID, &rev); + + /* SB800 needs pre-fetch fix */ + if ((rev >= 0x40) && (rev <= 0x4f)) { + ohci->flags |= OHCI_QUIRK_AMD_PREFETCH; + ohci_dbg(ohci, "enabled AMD prefetch quirk\n"); + } + if ((rev > 0x3b) || (rev < 0x30)) { pci_dev_put(amd_smbus_dev); amd_smbus_dev = NULL; @@ -262,6 +269,19 @@ } +static void sb800_prefetch(struct ohci_hcd *ohci, int on) +{ + struct pci_dev *pdev; + u16 misc; + + pdev = to_pci_dev(ohci_to_hcd(ohci)->self.controller); + pci_read_config_word(pdev, 0x50, &misc); + if (on == 0) + pci_write_config_word(pdev, 0x50, misc & 0xfcff); + else + pci_write_config_word(pdev, 0x50, misc | 0x0300); +} + /* List of quirks for OHCI */ static const struct pci_device_id ohci_pci_quirks[] = { { --- linux-ec2-2.6.31.orig/drivers/usb/host/ohci-q.c +++ linux-ec2-2.6.31/drivers/usb/host/ohci-q.c @@ -49,9 +49,12 @@ switch (usb_pipetype (urb->pipe)) { case PIPE_ISOCHRONOUS: ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs--; - if (ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs == 0 - && quirk_amdiso(ohci)) - quirk_amd_pll(1); + if (ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs == 0) { + if (quirk_amdiso(ohci)) + quirk_amd_pll(1); + if (quirk_amdprefetch(ohci)) + sb800_prefetch(ohci, 0); + } break; case PIPE_INTERRUPT: ohci_to_hcd(ohci)->self.bandwidth_int_reqs--; @@ -680,9 +683,12 @@ data + urb->iso_frame_desc [cnt].offset, urb->iso_frame_desc [cnt].length, urb, cnt); } - if (ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs == 0 - && quirk_amdiso(ohci)) - quirk_amd_pll(0); + if (ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs == 0) { + if (quirk_amdiso(ohci)) + quirk_amd_pll(0); + if (quirk_amdprefetch(ohci)) + sb800_prefetch(ohci, 1); + } periodic = ohci_to_hcd(ohci)->self.bandwidth_isoc_reqs++ == 0 && ohci_to_hcd(ohci)->self.bandwidth_int_reqs == 0; break; --- linux-ec2-2.6.31.orig/drivers/usb/host/ehci-sched.c +++ linux-ec2-2.6.31/drivers/usb/host/ehci-sched.c @@ -456,6 +456,8 @@ /* make sure ehci_work scans these */ ehci->next_uframe = ehci_readl(ehci, &ehci->regs->frame_index) % (ehci->periodic_size << 3); + if (unlikely(ehci->broken_periodic)) + ehci->last_periodic_enable = ktime_get_real(); return 0; } @@ -467,6 +469,16 @@ if (--ehci->periodic_sched) return 0; + if (unlikely(ehci->broken_periodic)) { + /* delay experimentally determined */ + ktime_t safe = ktime_add_us(ehci->last_periodic_enable, 1000); + ktime_t now = ktime_get_real(); + s64 delay = ktime_us_delta(safe, now); + + if (unlikely(delay > 0)) + udelay(delay); + } + /* did setting PSE not take effect yet? * takes effect only at frame boundaries... */ --- linux-ec2-2.6.31.orig/drivers/usb/host/ehci-pci.c +++ linux-ec2-2.6.31/drivers/usb/host/ehci-pci.c @@ -72,6 +72,12 @@ int retval; switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + if (pdev->device == 0x27cc) { + ehci->broken_periodic = 1; + ehci_info(ehci, "using broken periodic workaround\n"); + } + break; case PCI_VENDOR_ID_TOSHIBA_2: /* celleb's companion chip */ if (pdev->device == 0x01b5) { --- linux-ec2-2.6.31.orig/drivers/usb/host/sl811-hcd.c +++ linux-ec2-2.6.31/drivers/usb/host/sl811-hcd.c @@ -719,8 +719,12 @@ /* port status seems weird until after reset, so * force the reset and make khubd clean up later. */ - sl811->port1 |= (1 << USB_PORT_FEAT_C_CONNECTION) - | (1 << USB_PORT_FEAT_CONNECTION); + if (sl811->stat_insrmv & 1) + sl811->port1 |= 1 << USB_PORT_FEAT_CONNECTION; + else + sl811->port1 &= ~(1 << USB_PORT_FEAT_CONNECTION); + + sl811->port1 |= 1 << USB_PORT_FEAT_C_CONNECTION; } else if (irqstat & SL11H_INTMASK_RD) { if (sl811->port1 & (1 << USB_PORT_FEAT_SUSPEND)) { --- linux-ec2-2.6.31.orig/drivers/usb/host/ehci-q.c +++ linux-ec2-2.6.31/drivers/usb/host/ehci-q.c @@ -475,8 +475,20 @@ * we must clear the TT buffer (11.17.5). */ if (unlikely(last_status != -EINPROGRESS && - last_status != -EREMOTEIO)) - ehci_clear_tt_buffer(ehci, qh, urb, token); + last_status != -EREMOTEIO)) { + /* The TT's in some hubs malfunction when they + * receive this request following a STALL (they + * stop sending isochronous packets). Since a + * STALL can't leave the TT buffer in a busy + * state (if you believe Figures 11-48 - 11-51 + * in the USB 2.0 spec), we won't clear the TT + * buffer in this case. Strictly speaking this + * is a violation of the spec. + */ + if (last_status != -EPIPE) + ehci_clear_tt_buffer(ehci, qh, urb, + token); + } } /* if we're removing something not at the queue head, --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci.h +++ linux-ec2-2.6.31/drivers/usb/host/xhci.h @@ -581,6 +581,7 @@ /* bit 15 is Linear Stream Array */ /* Interval - period between requests to an endpoint - 125u increments. */ #define EP_INTERVAL(p) ((p & 0xff) << 16) +#define EP_INTERVAL_TO_UFRAMES(p) (1 << (((p) >> 16) & 0xff)) /* ep_info2 bitmasks */ /* @@ -589,6 +590,7 @@ */ #define FORCE_EVENT (0x1) #define ERROR_COUNT(p) (((p) & 0x3) << 1) +#define CTX_TO_EP_TYPE(p) (((p) >> 3) & 0x7) #define EP_TYPE(p) ((p) << 3) #define ISOC_OUT_EP 1 #define BULK_OUT_EP 2 @@ -601,6 +603,8 @@ /* bit 7 is Host Initiate Disable - for disabling stream selection */ #define MAX_BURST(p) (((p)&0xff) << 8) #define MAX_PACKET(p) (((p)&0xffff) << 16) +#define MAX_PACKET_MASK (0xffff << 16) +#define MAX_PACKET_DECODED(p) (((p) >> 16) & 0xffff) /** @@ -926,6 +930,12 @@ union xhci_trb *last_trb; }; +struct xhci_dequeue_state { + struct xhci_segment *new_deq_seg; + union xhci_trb *new_deq_ptr; + int new_cycle_state; +}; + struct xhci_ring { struct xhci_segment *first_seg; union xhci_trb *enqueue; @@ -952,12 +962,6 @@ u32 cycle_state; }; -struct xhci_dequeue_state { - struct xhci_segment *new_deq_seg; - union xhci_trb *new_deq_ptr; - int new_cycle_state; -}; - struct xhci_erst_entry { /* 64-bit event ring segment address */ u64 seg_addr; @@ -1058,6 +1062,9 @@ int noops_submitted; int noops_handled; int error_bitmask; + unsigned int quirks; +#define XHCI_LINK_TRB_QUIRK (1 << 0) +#define XHCI_RESET_EP_QUIRK (1 << 1) }; /* For testing purposes */ @@ -1136,6 +1143,13 @@ writel(val_hi, ptr + 1); } +static inline int xhci_link_trb_quirk(struct xhci_hcd *xhci) +{ + u32 temp = xhci_readl(xhci, &xhci->cap_regs->hc_capbase); + return ((HC_VERSION(temp) == 0x95) && + (xhci->quirks & XHCI_LINK_TRB_QUIRK)); +} + /* xHCI debugging */ void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num); void xhci_print_registers(struct xhci_hcd *xhci); @@ -1158,7 +1172,12 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *udev); unsigned int xhci_get_endpoint_index(struct usb_endpoint_descriptor *desc); unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc); +unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index); +unsigned int xhci_last_valid_endpoint(u32 added_ctxs); void xhci_endpoint_zero(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_host_endpoint *ep); +void xhci_endpoint_copy(struct xhci_hcd *xhci, + struct xhci_virt_device *vdev, unsigned int ep_index); +void xhci_slot_copy(struct xhci_hcd *xhci, struct xhci_virt_device *vdev); int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *udev, struct usb_host_endpoint *ep, gfp_t mem_flags); @@ -1205,8 +1224,12 @@ int slot_id, unsigned int ep_index); int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb, int slot_id, unsigned int ep_index); +int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb, + int slot_id, unsigned int ep_index); int xhci_queue_configure_endpoint(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr, u32 slot_id); +int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr, + u32 slot_id); int xhci_queue_reset_ep(struct xhci_hcd *xhci, int slot_id, unsigned int ep_index); void xhci_find_new_dequeue_state(struct xhci_hcd *xhci, @@ -1215,6 +1238,12 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, unsigned int slot_id, unsigned int ep_index, struct xhci_dequeue_state *deq_state); +void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, + struct usb_device *udev, + unsigned int ep_index, struct xhci_ring *ep_ring); +void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, + unsigned int slot_id, unsigned int ep_index, + struct xhci_dequeue_state *deq_state); /* xHCI roothub code */ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, --- linux-ec2-2.6.31.orig/drivers/usb/host/ehci.h +++ linux-ec2-2.6.31/drivers/usb/host/ehci.h @@ -118,6 +118,7 @@ unsigned stamp; unsigned random_frame; unsigned long next_statechange; + ktime_t last_periodic_enable; u32 command; /* SILICON QUIRKS */ @@ -126,6 +127,7 @@ unsigned big_endian_mmio:1; unsigned big_endian_desc:1; unsigned has_amcc_usb23:1; + unsigned broken_periodic:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) --- linux-ec2-2.6.31.orig/drivers/usb/host/ohci.h +++ linux-ec2-2.6.31/drivers/usb/host/ohci.h @@ -402,6 +402,7 @@ #define OHCI_QUIRK_FRAME_NO 0x80 /* no big endian frame_no shift */ #define OHCI_QUIRK_HUB_POWER 0x100 /* distrust firmware power/oc setup */ #define OHCI_QUIRK_AMD_ISO 0x200 /* ISO transfers*/ +#define OHCI_QUIRK_AMD_PREFETCH 0x400 /* pre-fetch for ISO transfer */ // there are also chip quirks/bugs in init logic struct work_struct nec_work; /* Worker for NEC quirk */ @@ -433,6 +434,10 @@ { return ohci->flags & OHCI_QUIRK_AMD_ISO; } +static inline int quirk_amdprefetch(struct ohci_hcd *ohci) +{ + return ohci->flags & OHCI_QUIRK_AMD_PREFETCH; +} #else static inline int quirk_nec(struct ohci_hcd *ohci) { @@ -446,6 +451,10 @@ { return 0; } +static inline int quirk_amdprefetch(struct ohci_hcd *ohci) +{ + return 0; +} #endif /* convert between an hcd pointer and the corresponding ohci_hcd */ --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci-pci.c +++ linux-ec2-2.6.31/drivers/usb/host/xhci-pci.c @@ -24,6 +24,10 @@ #include "xhci.h" +/* Device for a quirk */ +#define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 +#define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 + static const char hcd_name[] = "xhci_hcd"; /* called after powerup, by probe or system-pm "wakeup" */ @@ -62,6 +66,15 @@ xhci->hcc_params = xhci_readl(xhci, &xhci->cap_regs->hcc_params); xhci_print_registers(xhci); + /* Look for vendor-specific quirks */ + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK && + pdev->revision == 0x0) { + xhci->quirks |= XHCI_RESET_EP_QUIRK; + xhci_dbg(xhci, "QUIRK: Fresco Logic xHC needs configure" + " endpoint cmd after reset endpoint\n"); + } + /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci-ring.c +++ linux-ec2-2.6.31/drivers/usb/host/xhci-ring.c @@ -172,8 +172,9 @@ * have their chain bit cleared (so that each Link TRB is a separate TD). * * Section 6.4.4.1 of the 0.95 spec says link TRBs cannot have the chain bit - * set, but other sections talk about dealing with the chain bit set. - * Assume section 6.4.4.1 is wrong, and the chain bit can be set in a Link TRB. + * set, but other sections talk about dealing with the chain bit set. This was + * fixed in the 0.96 specification errata, but we have to assume that all 0.95 + * xHCI hardware can't handle the chain bit being cleared on a link TRB. */ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer) { @@ -191,8 +192,14 @@ while (last_trb(xhci, ring, ring->enq_seg, next)) { if (!consumer) { if (ring != xhci->event_ring) { - next->link.control &= ~TRB_CHAIN; - next->link.control |= chain; + /* If we're not dealing with 0.95 hardware, + * carry over the chain bit of the previous TRB + * (which may mean the chain bit is cleared). + */ + if (!xhci_link_trb_quirk(xhci)) { + next->link.control &= ~TRB_CHAIN; + next->link.control |= chain; + } /* Give this link TRB to the hardware */ wmb(); if (next->link.control & TRB_CYCLE) @@ -462,7 +469,6 @@ * ring running. */ ep_ring->state |= SET_DEQ_PENDING; - xhci_ring_cmd_db(xhci); } /* @@ -531,6 +537,7 @@ if (deq_state.new_deq_ptr && deq_state.new_deq_seg) { xhci_queue_new_dequeue_state(xhci, ep_ring, slot_id, ep_index, &deq_state); + xhci_ring_cmd_db(xhci); } else { /* Otherwise just ring the doorbell to restart the ring */ ring_ep_doorbell(xhci, slot_id, ep_index); @@ -644,18 +651,31 @@ { int slot_id; unsigned int ep_index; + struct xhci_ring *ep_ring; slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]); ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]); + ep_ring = xhci->devs[slot_id]->ep_rings[ep_index]; /* This command will only fail if the endpoint wasn't halted, * but we don't care. */ xhci_dbg(xhci, "Ignoring reset ep completion code of %u\n", (unsigned int) GET_COMP_CODE(event->status)); - /* Clear our internal halted state and restart the ring */ - xhci->devs[slot_id]->ep_rings[ep_index]->state &= ~EP_HALTED; - ring_ep_doorbell(xhci, slot_id, ep_index); + /* HW with the reset endpoint quirk needs to have a configure endpoint + * command complete before the endpoint can be used. Queue that here + * because the HW can't handle two commands being queued in a row. + */ + if (xhci->quirks & XHCI_RESET_EP_QUIRK) { + xhci_dbg(xhci, "Queueing configure endpoint command\n"); + xhci_queue_configure_endpoint(xhci, + xhci->devs[slot_id]->in_ctx->dma, slot_id); + xhci_ring_cmd_db(xhci); + } else { + /* Clear our internal halted state and restart the ring */ + ep_ring->state &= ~EP_HALTED; + ring_ep_doorbell(xhci, slot_id, ep_index); + } } static void handle_cmd_completion(struct xhci_hcd *xhci, @@ -664,6 +684,10 @@ int slot_id = TRB_TO_SLOT_ID(event->flags); u64 cmd_dma; dma_addr_t cmd_dequeue_dma; + struct xhci_input_control_ctx *ctrl_ctx; + unsigned int ep_index; + struct xhci_ring *ep_ring; + unsigned int ep_state; cmd_dma = event->cmd_trb; cmd_dequeue_dma = xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg, @@ -691,6 +715,41 @@ xhci_free_virt_device(xhci, slot_id); break; case TRB_TYPE(TRB_CONFIG_EP): + /* + * Configure endpoint commands can come from the USB core + * configuration or alt setting changes, or because the HW + * needed an extra configure endpoint command after a reset + * endpoint command. In the latter case, the xHCI driver is + * not waiting on the configure endpoint command. + */ + ctrl_ctx = xhci_get_input_control_ctx(xhci, + xhci->devs[slot_id]->in_ctx); + /* Input ctx add_flags are the endpoint index plus one */ + ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1; + ep_ring = xhci->devs[slot_id]->ep_rings[ep_index]; + if (!ep_ring) { + /* This must have been an initial configure endpoint */ + xhci->devs[slot_id]->cmd_status = + GET_COMP_CODE(event->status); + complete(&xhci->devs[slot_id]->cmd_completion); + break; + } + ep_state = ep_ring->state; + xhci_dbg(xhci, "Completed config ep cmd - last ep index = %d, " + "state = %d\n", ep_index, ep_state); + if (xhci->quirks & XHCI_RESET_EP_QUIRK && + ep_state & EP_HALTED) { + /* Clear our internal halted state and restart ring */ + xhci->devs[slot_id]->ep_rings[ep_index]->state &= + ~EP_HALTED; + ring_ep_doorbell(xhci, slot_id, ep_index); + } else { + xhci->devs[slot_id]->cmd_status = + GET_COMP_CODE(event->status); + complete(&xhci->devs[slot_id]->cmd_completion); + } + break; + case TRB_TYPE(TRB_EVAL_CONTEXT): xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status); complete(&xhci->devs[slot_id]->cmd_completion); break; @@ -763,9 +822,11 @@ cur_seg = start_seg; do { + if (start_dma == 0) + return 0; /* We may get an event for a Link TRB in the middle of a TD */ end_seg_dma = xhci_trb_virt_to_dma(cur_seg, - &start_seg->trbs[TRBS_PER_SEGMENT - 1]); + &cur_seg->trbs[TRBS_PER_SEGMENT - 1]); /* If the end TRB isn't in this segment, this is set to 0 */ end_trb_dma = xhci_trb_virt_to_dma(cur_seg, end_trb); @@ -792,8 +853,9 @@ } cur_seg = cur_seg->next; start_dma = xhci_trb_virt_to_dma(cur_seg, &cur_seg->trbs[0]); - } while (1); + } while (cur_seg != start_seg); + return 0; } /* @@ -806,6 +868,7 @@ { struct xhci_virt_device *xdev; struct xhci_ring *ep_ring; + unsigned int slot_id; int ep_index; struct xhci_td *td = 0; dma_addr_t event_dma; @@ -814,9 +877,11 @@ struct urb *urb = 0; int status = -EINPROGRESS; struct xhci_ep_ctx *ep_ctx; + u32 trb_comp_code; xhci_dbg(xhci, "In %s\n", __func__); - xdev = xhci->devs[TRB_TO_SLOT_ID(event->flags)]; + slot_id = TRB_TO_SLOT_ID(event->flags); + xdev = xhci->devs[slot_id]; if (!xdev) { xhci_err(xhci, "ERROR Transfer event pointed to bad slot\n"); return -ENODEV; @@ -870,7 +935,8 @@ (unsigned int) event->flags); /* Look for common error cases */ - switch (GET_COMP_CODE(event->transfer_len)) { + trb_comp_code = GET_COMP_CODE(event->transfer_len); + switch (trb_comp_code) { /* Skip codes that require special handling depending on * transfer type */ @@ -913,7 +979,7 @@ /* Was this a control transfer? */ if (usb_endpoint_xfer_control(&td->urb->ep->desc)) { xhci_debug_trb(xhci, xhci->event_ring->dequeue); - switch (GET_COMP_CODE(event->transfer_len)) { + switch (trb_comp_code) { case COMP_SUCCESS: if (event_trb == ep_ring->dequeue) { xhci_warn(xhci, "WARN: Success on ctrl setup TRB without IOC set??\n"); @@ -928,8 +994,39 @@ break; case COMP_SHORT_TX: xhci_warn(xhci, "WARN: short transfer on control ep\n"); - status = -EREMOTEIO; + if (td->urb->transfer_flags & URB_SHORT_NOT_OK) + status = -EREMOTEIO; + else + status = 0; break; + case COMP_BABBLE: + /* The 0.96 spec says a babbling control endpoint + * is not halted. The 0.96 spec says it is. Some HW + * claims to be 0.95 compliant, but it halts the control + * endpoint anyway. Check if a babble halted the + * endpoint. + */ + if (ep_ctx->ep_info != EP_STATE_HALTED) + break; + /* else fall through */ + case COMP_STALL: + /* Did we transfer part of the data (middle) phase? */ + if (event_trb != ep_ring->dequeue && + event_trb != td->last_trb) + td->urb->actual_length = + td->urb->transfer_buffer_length + - TRB_LEN(event->transfer_len); + else + td->urb->actual_length = 0; + + ep_ring->stopped_td = td; + ep_ring->stopped_trb = event_trb; + xhci_queue_reset_ep(xhci, slot_id, ep_index); + xhci_cleanup_stalled_ring(xhci, + td->urb->dev, + ep_index, ep_ring); + xhci_ring_cmd_db(xhci); + goto td_cleanup; default: /* Others already handled above */ break; @@ -943,7 +1040,10 @@ if (event_trb == td->last_trb) { if (td->urb->actual_length != 0) { /* Don't overwrite a previously set error code */ - if (status == -EINPROGRESS || status == 0) + if ((status == -EINPROGRESS || + status == 0) && + (td->urb->transfer_flags + & URB_SHORT_NOT_OK)) /* Did we already see a short data stage? */ status = -EREMOTEIO; } else { @@ -952,7 +1052,7 @@ } } else { /* Maybe the event was for the data stage? */ - if (GET_COMP_CODE(event->transfer_len) != COMP_STOP_INVAL) { + if (trb_comp_code != COMP_STOP_INVAL) { /* We didn't stop on a link TRB in the middle */ td->urb->actual_length = td->urb->transfer_buffer_length - @@ -964,7 +1064,7 @@ } } } else { - switch (GET_COMP_CODE(event->transfer_len)) { + switch (trb_comp_code) { case COMP_SUCCESS: /* Double check that the HW transferred everything. */ if (event_trb != td->last_trb) { @@ -975,7 +1075,12 @@ else status = 0; } else { - xhci_dbg(xhci, "Successful bulk transfer!\n"); + if (usb_endpoint_xfer_bulk(&td->urb->ep->desc)) + xhci_dbg(xhci, "Successful bulk " + "transfer!\n"); + else + xhci_dbg(xhci, "Successful interrupt " + "transfer!\n"); status = 0; } break; @@ -1001,11 +1106,17 @@ td->urb->actual_length = td->urb->transfer_buffer_length - TRB_LEN(event->transfer_len); - if (td->urb->actual_length < 0) { + if (td->urb->transfer_buffer_length < + td->urb->actual_length) { xhci_warn(xhci, "HC gave bad length " "of %d bytes left\n", TRB_LEN(event->transfer_len)); td->urb->actual_length = 0; + if (td->urb->transfer_flags & + URB_SHORT_NOT_OK) + status = -EREMOTEIO; + else + status = 0; } /* Don't overwrite a previously set error code */ if (status == -EINPROGRESS) { @@ -1041,14 +1152,14 @@ /* If the ring didn't stop on a Link or No-op TRB, add * in the actual bytes transferred from the Normal TRB */ - if (GET_COMP_CODE(event->transfer_len) != COMP_STOP_INVAL) + if (trb_comp_code != COMP_STOP_INVAL) td->urb->actual_length += TRB_LEN(cur_trb->generic.field[2]) - TRB_LEN(event->transfer_len); } } - if (GET_COMP_CODE(event->transfer_len) == COMP_STOP_INVAL || - GET_COMP_CODE(event->transfer_len) == COMP_STOP) { + if (trb_comp_code == COMP_STOP_INVAL || + trb_comp_code == COMP_STOP) { /* The Endpoint Stop Command completion will take care of any * stopped TDs. A stopped TD may be restarted, so don't update * the ring dequeue pointer or take this TD off any lists yet. @@ -1056,7 +1167,8 @@ ep_ring->stopped_td = td; ep_ring->stopped_trb = event_trb; } else { - if (GET_COMP_CODE(event->transfer_len) == COMP_STALL) { + if (trb_comp_code == COMP_STALL || + trb_comp_code == COMP_BABBLE) { /* The transfer is completed from the driver's * perspective, but we need to issue a set dequeue * command for this stalled endpoint to move the dequeue @@ -1072,16 +1184,41 @@ inc_deq(xhci, ep_ring, false); } +td_cleanup: /* Clean up the endpoint's TD list */ urb = td->urb; + /* Do one last check of the actual transfer length. + * If the host controller said we transferred more data than + * the buffer length, urb->actual_length will be a very big + * number (since it's unsigned). Play it safe and say we didn't + * transfer anything. + */ + if (urb->actual_length > urb->transfer_buffer_length) { + xhci_warn(xhci, "URB transfer length is wrong, " + "xHC issue? req. len = %u, " + "act. len = %u\n", + urb->transfer_buffer_length, + urb->actual_length); + urb->actual_length = 0; + if (td->urb->transfer_flags & URB_SHORT_NOT_OK) + status = -EREMOTEIO; + else + status = 0; + } list_del(&td->td_list); /* Was this TD slated to be cancelled but completed anyway? */ if (!list_empty(&td->cancelled_td_list)) { list_del(&td->cancelled_td_list); ep_ring->cancels_pending--; } - /* Leave the TD around for the reset endpoint function to use */ - if (GET_COMP_CODE(event->transfer_len) != COMP_STALL) { + /* Leave the TD around for the reset endpoint function to use + * (but only if it's not a control endpoint, since we already + * queued the Set TR dequeue pointer command for stalled + * control endpoints). + */ + if (usb_endpoint_xfer_control(&urb->ep->desc) || + (trb_comp_code != COMP_STALL && + trb_comp_code != COMP_BABBLE)) { kfree(td); } urb->hcpriv = NULL; @@ -1094,7 +1231,7 @@ if (urb) { usb_hcd_unlink_urb_from_ep(xhci_to_hcd(xhci), urb); xhci_dbg(xhci, "Giveback URB %p, len = %d, status = %d\n", - urb, td->urb->actual_length, status); + urb, urb->actual_length, status); spin_unlock(&xhci->lock); usb_hcd_giveback_urb(xhci_to_hcd(xhci), urb, status); spin_lock(&xhci->lock); @@ -1335,6 +1472,47 @@ ring_ep_doorbell(xhci, slot_id, ep_index); } +/* + * xHCI uses normal TRBs for both bulk and interrupt. When the interrupt + * endpoint is to be serviced, the xHC will consume (at most) one TD. A TD + * (comprised of sg list entries) can take several service intervals to + * transmit. + */ +int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, + struct urb *urb, int slot_id, unsigned int ep_index) +{ + struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, + xhci->devs[slot_id]->out_ctx, ep_index); + int xhci_interval; + int ep_interval; + + xhci_interval = EP_INTERVAL_TO_UFRAMES(ep_ctx->ep_info); + ep_interval = urb->interval; + /* Convert to microframes */ + if (urb->dev->speed == USB_SPEED_LOW || + urb->dev->speed == USB_SPEED_FULL) + ep_interval *= 8; + /* FIXME change this to a warning and a suggestion to use the new API + * to set the polling interval (once the API is added). + */ + if (xhci_interval != ep_interval) { + if (!printk_ratelimit()) + dev_dbg(&urb->dev->dev, "Driver uses different interval" + " (%d microframe%s) than xHCI " + "(%d microframe%s)\n", + ep_interval, + ep_interval == 1 ? "" : "s", + xhci_interval, + xhci_interval == 1 ? "" : "s"); + urb->interval = xhci_interval; + /* Convert back to frames for LS/FS devices */ + if (urb->dev->speed == USB_SPEED_LOW || + urb->dev->speed == USB_SPEED_FULL) + urb->interval /= 8; + } + return xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); +} + static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb, int slot_id, unsigned int ep_index) { @@ -1733,6 +1911,15 @@ TRB_TYPE(TRB_CONFIG_EP) | SLOT_ID_FOR_TRB(slot_id)); } +/* Queue an evaluate context command TRB */ +int xhci_queue_evaluate_context(struct xhci_hcd *xhci, dma_addr_t in_ctx_ptr, + u32 slot_id) +{ + return queue_command(xhci, lower_32_bits(in_ctx_ptr), + upper_32_bits(in_ctx_ptr), 0, + TRB_TYPE(TRB_EVAL_CONTEXT) | SLOT_ID_FOR_TRB(slot_id)); +} + int xhci_queue_stop_endpoint(struct xhci_hcd *xhci, int slot_id, unsigned int ep_index) { --- linux-ec2-2.6.31.orig/drivers/usb/host/xhci-dbg.c +++ linux-ec2-2.6.31/drivers/usb/host/xhci-dbg.c @@ -413,7 +413,8 @@ int i; struct xhci_slot_ctx *slot_ctx = xhci_get_slot_ctx(xhci, ctx); - dma_addr_t dma = ctx->dma + ((unsigned long)slot_ctx - (unsigned long)ctx); + dma_addr_t dma = ctx->dma + + ((unsigned long)slot_ctx - (unsigned long)ctx->bytes); int csz = HCC_64BYTE_CONTEXT(xhci->hcc_params); xhci_dbg(xhci, "Slot Context:\n"); @@ -459,7 +460,7 @@ for (i = 0; i < last_ep_ctx; ++i) { struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, ctx, i); dma_addr_t dma = ctx->dma + - ((unsigned long)ep_ctx - (unsigned long)ctx); + ((unsigned long)ep_ctx - (unsigned long)ctx->bytes); xhci_dbg(xhci, "Endpoint %02d Context:\n", i); xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - ep_info\n", --- linux-ec2-2.6.31.orig/drivers/usb/host/ehci-hcd.c +++ linux-ec2-2.6.31/drivers/usb/host/ehci-hcd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -655,6 +656,7 @@ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); up_write(&ehci_cf_port_reset_rwsem); + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); ehci_info (ehci, --- linux-ec2-2.6.31.orig/drivers/usb/serial/option.c +++ linux-ec2-2.6.31/drivers/usb/serial/option.c @@ -166,6 +166,7 @@ #define HUAWEI_PRODUCT_E143D 0x143D #define HUAWEI_PRODUCT_E143E 0x143E #define HUAWEI_PRODUCT_E143F 0x143F +#define HUAWEI_PRODUCT_E14AC 0x14AC #define QUANTA_VENDOR_ID 0x0408 #define QUANTA_PRODUCT_Q101 0xEA02 @@ -225,6 +226,7 @@ #define AMOI_VENDOR_ID 0x1614 #define AMOI_PRODUCT_H01 0x0800 #define AMOI_PRODUCT_H01A 0x7002 +#define AMOI_PRODUCT_9508 0x0800 #define AMOI_PRODUCT_H02 0x0802 #define DELL_VENDOR_ID 0x413C @@ -283,15 +285,13 @@ #define BANDRICH_PRODUCT_1011 0x1011 #define BANDRICH_PRODUCT_1012 0x1012 -#define AMOI_VENDOR_ID 0x1614 -#define AMOI_PRODUCT_9508 0x0800 - #define QUALCOMM_VENDOR_ID 0x05C6 #define MAXON_VENDOR_ID 0x16d8 #define TELIT_VENDOR_ID 0x1bc7 #define TELIT_PRODUCT_UC864E 0x1003 +#define TELIT_PRODUCT_UC864G 0x1004 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -300,12 +300,14 @@ #define ZTE_PRODUCT_MF626 0x0031 #define ZTE_PRODUCT_CDMA_TECH 0xfffe #define ZTE_PRODUCT_AC8710 0xfff1 +#define ZTE_PRODUCT_AC2726 0xfff5 #define BENQ_VENDOR_ID 0x04a5 #define BENQ_PRODUCT_H10 0x4068 #define DLINK_VENDOR_ID 0x1186 #define DLINK_PRODUCT_DWM_652 0x3e04 +#define DLINK_PRODUCT_DWM_652_U5 0xce16 #define QISDA_VENDOR_ID 0x1da5 #define QISDA_PRODUCT_H21_4512 0x4512 @@ -313,10 +315,14 @@ #define QISDA_PRODUCT_H20_4515 0x4515 #define QISDA_PRODUCT_H20_4519 0x4519 +/* TLAYTECH PRODUCTS */ +#define TLAYTECH_VENDOR_ID 0x20B9 +#define TLAYTECH_PRODUCT_TEU800 0x1682 /* TOSHIBA PRODUCTS */ #define TOSHIBA_VENDOR_ID 0x0930 #define TOSHIBA_PRODUCT_HSDPA_MINICARD 0x1302 +#define TOSHIBA_PRODUCT_G450 0x0d45 #define ALINK_VENDOR_ID 0x1e0e #define ALINK_PRODUCT_3GU 0x9200 @@ -325,6 +331,13 @@ #define ALCATEL_VENDOR_ID 0x1bbb #define ALCATEL_PRODUCT_X060S 0x0000 +/* Airplus products */ +#define AIRPLUS_VENDOR_ID 0x1011 +#define AIRPLUS_PRODUCT_MCD650 0x3198 + +/* Haier products */ +#define HAIER_VENDOR_ID 0x201e +#define HAIER_PRODUCT_CE100 0x2009 static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -423,6 +436,7 @@ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143D, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143E, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143F, 0xff, 0xff, 0xff) }, + { USB_DEVICE(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_9508) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, /* Novatel Merlin V640/XV620 */ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, /* Novatel Merlin V620/S620 */ @@ -503,6 +517,7 @@ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(MAXON_VENDOR_ID, 0x6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864E) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864G) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0003, 0xff, 0xff, 0xff) }, @@ -564,24 +579,67 @@ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0086, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0104, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0106, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0108, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0113, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0117, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0118, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0121, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0122, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0123, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0124, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0125, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0126, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0128, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0142, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0143, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0144, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0145, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0146, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0147, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0148, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0149, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0150, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0151, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0152, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0153, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0154, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0155, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0156, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0157, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0158, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0159, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0160, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0161, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0162, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0014, 0xff, 0xff, 0xff) }, /* ZTE CDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0027, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0059, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0060, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0141, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) }, { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) }, { USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) }, + { USB_DEVICE(ALINK_VENDOR_ID, DLINK_PRODUCT_DWM_652_U5) }, /* Yes, ALINK_VENDOR_ID */ { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H21_4512) }, { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H21_4523) }, { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H20_4515) }, { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H20_4519) }, + { USB_DEVICE(TOSHIBA_VENDOR_ID, TOSHIBA_PRODUCT_G450) }, { USB_DEVICE(TOSHIBA_VENDOR_ID, TOSHIBA_PRODUCT_HSDPA_MINICARD ) }, /* Toshiba 3G HSDPA == Novatel Expedite EU870D MiniCard */ { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) }, + { USB_DEVICE(ALINK_VENDOR_ID, 0xce16) }, { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S) }, + { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, + { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, + { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); --- linux-ec2-2.6.31.orig/drivers/usb/serial/iuu_phoenix.c +++ linux-ec2-2.6.31/drivers/usb/serial/iuu_phoenix.c @@ -71,7 +71,6 @@ spinlock_t lock; /* store irq state */ wait_queue_head_t delta_msr_wait; u8 line_status; - u8 termios_initialized; int tiostatus; /* store IUART SIGNAL for tiocmget call */ u8 reset; /* if 1 reset is needed */ int poll; /* number of poll */ @@ -1018,6 +1017,18 @@ } } +static void iuu_init_termios(struct tty_struct *tty) +{ + *(tty->termios) = tty_std_termios; + tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600 + | TIOCM_CTS | CSTOPB | PARENB; + tty->termios->c_ispeed = 9600; + tty->termios->c_ospeed = 9600; + tty->termios->c_lflag = 0; + tty->termios->c_oflag = 0; + tty->termios->c_iflag = 0; +} + static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { @@ -1025,7 +1036,6 @@ u8 *buf; int result; u32 actual; - unsigned long flags; struct iuu_private *priv = usb_get_serial_port_data(port); dbg("%s - port %d", __func__, port->number); @@ -1064,21 +1074,7 @@ port->bulk_in_buffer, 512, NULL, NULL); - /* set the termios structure */ - spin_lock_irqsave(&priv->lock, flags); - if (tty && !priv->termios_initialized) { - *(tty->termios) = tty_std_termios; - tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600 - | TIOCM_CTS | CSTOPB | PARENB; - tty->termios->c_ispeed = 9600; - tty->termios->c_ospeed = 9600; - tty->termios->c_lflag = 0; - tty->termios->c_oflag = 0; - tty->termios->c_iflag = 0; - priv->termios_initialized = 1; - priv->poll = 0; - } - spin_unlock_irqrestore(&priv->lock, flags); + priv->poll = 0; /* initialize writebuf */ #define FISH(a, b, c, d) do { \ @@ -1201,6 +1197,7 @@ .tiocmget = iuu_tiocmget, .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, + .init_termios = iuu_init_termios, .attach = iuu_startup, .release = iuu_release, }; --- linux-ec2-2.6.31.orig/drivers/usb/serial/sierra.c +++ linux-ec2-2.6.31/drivers/usb/serial/sierra.c @@ -287,6 +287,8 @@ struct sierra_port_private *portdata; __u16 interface = 0; int val = 0; + int do_send = 0; + int retval; dev_dbg(&port->dev, "%s\n", __func__); @@ -305,10 +307,7 @@ */ if (port->interrupt_in_urb) { /* send control message */ - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); + do_send = 1; } } @@ -320,12 +319,18 @@ interface = 1; else if (port->bulk_out_endpointAddress == 5) interface = 2; - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); + + do_send = 1; } - return 0; + if (!do_send) + return 0; + + usb_autopm_get_interface(serial->interface); + retval = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, interface, NULL, 0, USB_CTRL_SET_TIMEOUT); + usb_autopm_put_interface(serial->interface); + + return retval; } static void sierra_set_termios(struct tty_struct *tty, --- linux-ec2-2.6.31.orig/drivers/usb/serial/digi_acceleport.c +++ linux-ec2-2.6.31/drivers/usb/serial/digi_acceleport.c @@ -899,16 +899,16 @@ spin_lock_irqsave(&priv->dp_port_lock, flags); - /* turn throttle off */ - priv->dp_throttled = 0; - priv->dp_throttle_restart = 0; - /* restart read chain */ if (priv->dp_throttle_restart) { port->read_urb->dev = port->serial->dev; ret = usb_submit_urb(port->read_urb, GFP_ATOMIC); } + /* turn throttle off */ + priv->dp_throttled = 0; + priv->dp_throttle_restart = 0; + spin_unlock_irqrestore(&priv->dp_port_lock, flags); if (ret) --- linux-ec2-2.6.31.orig/drivers/usb/serial/pl2303.c +++ linux-ec2-2.6.31/drivers/usb/serial/pl2303.c @@ -96,6 +96,7 @@ { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, + { USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) }, { } /* Terminating entry */ }; @@ -994,13 +995,15 @@ /* overrun is special, not associated with a char */ if (line_status & UART_OVERRUN_ERROR) tty_insert_flip_char(tty, 0, TTY_OVERRUN); - if (port->console && port->sysrq) { + + if (tty_flag == TTY_NORMAL && !(port->console && port->sysrq)) + tty_insert_flip_string(tty, data, urb->actual_length); + else { int i; for (i = 0; i < urb->actual_length; ++i) if (!usb_serial_handle_sysrq_char(tty, port, data[i])) tty_insert_flip_char(tty, data[i], tty_flag); - } else - tty_insert_flip_string(tty, data, urb->actual_length); + } tty_flip_buffer_push(tty); } --- linux-ec2-2.6.31.orig/drivers/usb/serial/usb-serial.c +++ linux-ec2-2.6.31/drivers/usb/serial/usb-serial.c @@ -43,8 +43,6 @@ #define DRIVER_AUTHOR "Greg Kroah-Hartman, greg@kroah.com, http://www.kroah.com/linux/" #define DRIVER_DESC "USB Serial Driver core" -static void port_free(struct usb_serial_port *port); - /* Driver structure we register with the USB core */ static struct usb_driver usb_serial_driver = { .name = "usbserial", @@ -68,6 +66,11 @@ static DEFINE_MUTEX(table_lock); static LIST_HEAD(usb_serial_driver_list); +/* + * Look up the serial structure. If it is found and it hasn't been + * disconnected, return with its disc_mutex held and its refcount + * incremented. Otherwise return NULL. + */ struct usb_serial *usb_serial_get_by_index(unsigned index) { struct usb_serial *serial; @@ -75,8 +78,15 @@ mutex_lock(&table_lock); serial = serial_table[index]; - if (serial) - kref_get(&serial->kref); + if (serial) { + mutex_lock(&serial->disc_mutex); + if (serial->disconnected) { + mutex_unlock(&serial->disc_mutex); + serial = NULL; + } else { + kref_get(&serial->kref); + } + } mutex_unlock(&table_lock); return serial; } @@ -125,8 +135,10 @@ dbg("%s", __func__); + mutex_lock(&table_lock); for (i = 0; i < serial->num_ports; ++i) serial_table[serial->minor + i] = NULL; + mutex_unlock(&table_lock); } static void destroy_serial(struct kref *kref) @@ -143,163 +155,160 @@ if (serial->minor != SERIAL_TTY_NO_MINOR) return_serial(serial); - serial->type->release(serial); + if (serial->attached) + serial->type->release(serial); - for (i = 0; i < serial->num_ports; ++i) { + /* Now that nothing is using the ports, they can be freed */ + for (i = 0; i < serial->num_port_pointers; ++i) { port = serial->port[i]; - if (port) + if (port) { + port->serial = NULL; put_device(&port->dev); - } - - /* If this is a "fake" port, we have to clean it up here, as it will - * not get cleaned up in port_release() as it was never registered with - * the driver core */ - if (serial->num_ports < serial->num_port_pointers) { - for (i = serial->num_ports; - i < serial->num_port_pointers; ++i) { - port = serial->port[i]; - if (port) - port_free(port); } } usb_put_dev(serial->dev); - - /* free up any memory that we allocated */ kfree(serial); } void usb_serial_put(struct usb_serial *serial) { - mutex_lock(&table_lock); kref_put(&serial->kref, destroy_serial); - mutex_unlock(&table_lock); } /***************************************************************************** * Driver tty interface functions *****************************************************************************/ -static int serial_open (struct tty_struct *tty, struct file *filp) + +/** + * serial_install - install tty + * @driver: the driver (USB in our case) + * @tty: the tty being created + * + * Create the termios objects for this tty. We use the default + * USB serial settings but permit them to be overridden by + * serial->type->init_termios. + * + * This is the first place a new tty gets used. Hence this is where we + * acquire references to the usb_serial structure and the driver module, + * where we store a pointer to the port, and where we do an autoresume. + * All these actions are reversed in serial_release(). + */ +static int serial_install(struct tty_driver *driver, struct tty_struct *tty) { + int idx = tty->index; struct usb_serial *serial; struct usb_serial_port *port; - unsigned int portNumber; - int retval = 0; - int first = 0; + int retval = -ENODEV; dbg("%s", __func__); - /* get the serial object associated with this tty pointer */ - serial = usb_serial_get_by_index(tty->index); - if (!serial) { - tty->driver_data = NULL; - return -ENODEV; - } + serial = usb_serial_get_by_index(idx); + if (!serial) + return retval; - mutex_lock(&serial->disc_mutex); - portNumber = tty->index - serial->minor; - port = serial->port[portNumber]; - if (!port || serial->disconnected) - retval = -ENODEV; - else - get_device(&port->dev); - /* - * Note: Our locking order requirement does not allow port->mutex - * to be acquired while serial->disc_mutex is held. - */ - mutex_unlock(&serial->disc_mutex); + port = serial->port[idx - serial->minor]; + if (!port) + goto error_no_port; + if (!try_module_get(serial->type->driver.owner)) + goto error_module_get; + + /* perform the standard setup */ + retval = tty_init_termios(tty); if (retval) - goto bailout_serial_put; + goto error_init_termios; - if (mutex_lock_interruptible(&port->mutex)) { - retval = -ERESTARTSYS; - goto bailout_port_put; - } + retval = usb_autopm_get_interface(serial->interface); + if (retval) + goto error_get_interface; + + mutex_unlock(&serial->disc_mutex); - ++port->port.count; + /* allow the driver to update the settings */ + if (serial->type->init_termios) + serial->type->init_termios(tty); - /* set up our port structure making the tty driver - * remember our port object, and us it */ tty->driver_data = port; - tty_port_tty_set(&port->port, tty); - /* If the console is attached, the device is already open */ - if (port->port.count == 1 && !port->console) { - first = 1; - /* lock this module before we call it - * this may fail, which means we must bail out, - * safe because we are called with BKL held */ - if (!try_module_get(serial->type->driver.owner)) { - retval = -ENODEV; - goto bailout_mutex_unlock; - } + /* Final install (we use the default method) */ + tty_driver_kref_get(driver); + tty->count++; + driver->ttys[idx] = tty; + return retval; + error_get_interface: + error_init_termios: + module_put(serial->type->driver.owner); + error_module_get: + error_no_port: + usb_serial_put(serial); + mutex_unlock(&serial->disc_mutex); + return retval; +} + +static int serial_open(struct tty_struct *tty, struct file *filp) +{ + struct usb_serial_port *port = tty->driver_data; + struct usb_serial *serial = port->serial; + int retval; + + dbg("%s - port %d", __func__, port->number); + + spin_lock_irq(&port->port.lock); + if (!tty_hung_up_p(filp)) + ++port->port.count; + spin_unlock_irq(&port->port.lock); + tty_port_tty_set(&port->port, tty); + + /* Do the device-specific open only if the hardware isn't + * already initialized. + */ + if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags)) { + if (mutex_lock_interruptible(&port->mutex)) + return -ERESTARTSYS; mutex_lock(&serial->disc_mutex); if (serial->disconnected) retval = -ENODEV; else - retval = usb_autopm_get_interface(serial->interface); - if (retval) - goto bailout_module_put; - - /* only call the device specific open if this - * is the first time the port is opened */ - retval = serial->type->open(tty, port, filp); - if (retval) - goto bailout_interface_put; + retval = port->serial->type->open(tty, port, filp); mutex_unlock(&serial->disc_mutex); + mutex_unlock(&port->mutex); + if (retval) + return retval; set_bit(ASYNCB_INITIALIZED, &port->port.flags); } - mutex_unlock(&port->mutex); + /* Now do the correct tty layer semantics */ retval = tty_port_block_til_ready(&port->port, tty, filp); - if (retval == 0) { - if (!first) - usb_serial_put(serial); - return 0; - } - mutex_lock(&port->mutex); - if (first == 0) - goto bailout_mutex_unlock; - /* Undo the initial port actions */ - mutex_lock(&serial->disc_mutex); -bailout_interface_put: - usb_autopm_put_interface(serial->interface); -bailout_module_put: - mutex_unlock(&serial->disc_mutex); - module_put(serial->type->driver.owner); -bailout_mutex_unlock: - port->port.count = 0; - tty->driver_data = NULL; - tty_port_tty_set(&port->port, NULL); - mutex_unlock(&port->mutex); -bailout_port_put: - put_device(&port->dev); -bailout_serial_put: - usb_serial_put(serial); return retval; } /** - * serial_do_down - shut down hardware - * @port: port to shut down - * - * Shut down a USB port unless it is the console. We never shut down the - * console hardware as it will always be in use. + * serial_down - shut down hardware + * @port: port to shut down * - * Don't free any resources at this point + * Shut down a USB serial port unless it is the console. We never + * shut down the console hardware as it will always be in use. */ -static void serial_do_down(struct usb_serial_port *port) +static void serial_down(struct usb_serial_port *port) { struct usb_serial_driver *drv = port->serial->type; struct usb_serial *serial; struct module *owner; - /* The console is magical, do not hang up the console hardware - or there will be tears */ + /* + * The console is magical. Do not hang up the console hardware + * or there will be tears. + */ if (port->console) return; + /* Don't call the close method if the hardware hasn't been + * initialized. + */ + if (!test_and_clear_bit(ASYNCB_INITIALIZED, &port->port.flags)) + return; + mutex_lock(&port->mutex); serial = port->serial; owner = serial->type->driver.owner; @@ -310,79 +319,69 @@ mutex_unlock(&port->mutex); } -/** - * serial_do_free - free resources post close/hangup - * @port: port to free up - * - * Do the resource freeing and refcount dropping for the port. We must - * be careful about ordering and we must avoid freeing up the console. - */ - -static void serial_do_free(struct usb_serial_port *port) +static void serial_hangup(struct tty_struct *tty) { - struct usb_serial *serial; - struct module *owner; + struct usb_serial_port *port = tty->driver_data; - /* The console is magical, do not hang up the console hardware - or there will be tears */ - if (port->console) - return; + dbg("%s - port %d", __func__, port->number); - serial = port->serial; - owner = serial->type->driver.owner; - put_device(&port->dev); - /* Mustn't dereference port any more */ - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - mutex_unlock(&serial->disc_mutex); - usb_serial_put(serial); - /* Mustn't dereference serial any more */ - module_put(owner); + serial_down(port); + tty_port_hangup(&port->port); } static void serial_close(struct tty_struct *tty, struct file *filp) { struct usb_serial_port *port = tty->driver_data; - if (!port) - return; - dbg("%s - port %d", __func__, port->number); - /* FIXME: - This leaves a very narrow race. Really we should do the - serial_do_free() on tty->shutdown(), but tty->shutdown can - be called from IRQ context and serial_do_free can sleep. - - The right fix is probably to make the tty free (which is rare) - and thus tty->shutdown() occur via a work queue and simplify all - the drivers that use it. - */ - if (tty_hung_up_p(filp)) { - /* serial_hangup already called serial_down at this point. - Another user may have already reopened the port but - serial_do_free is refcounted */ - serial_do_free(port); + if (tty_hung_up_p(filp)) return; - } - if (tty_port_close_start(&port->port, tty, filp) == 0) return; - - serial_do_down(port); + serial_down(port); tty_port_close_end(&port->port, tty); tty_port_tty_set(&port->port, NULL); - serial_do_free(port); } -static void serial_hangup(struct tty_struct *tty) +/** + * serial_release - free resources post close/hangup + * @port: port to free up + * + * Do the resource freeing and refcount dropping for the port. + * Avoid freeing the console. + * + * Called when the last tty kref is dropped. + */ +static void serial_release(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - serial_do_down(port); - tty_port_hangup(&port->port); - /* We must not free port yet - the USB serial layer depends on it's - continued existence */ + struct usb_serial *serial; + struct module *owner; + + /* The console is magical. Do not hang up the console hardware + * or there will be tears. + */ + if (port->console) + return; + + dbg("%s - port %d", __func__, port->number); + + /* Standard shutdown processing */ + tty_shutdown(tty); + + tty->driver_data = NULL; + + serial = port->serial; + owner = serial->type->driver.owner; + + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + usb_autopm_put_interface(serial->interface); + mutex_unlock(&serial->disc_mutex); + + usb_serial_put(serial); + module_put(owner); } static int serial_write(struct tty_struct *tty, const unsigned char *buf, @@ -527,6 +526,7 @@ seq_putc(m, '\n'); usb_serial_put(serial); + mutex_unlock(&serial->disc_mutex); } return 0; } @@ -596,14 +596,6 @@ tty_kref_put(tty); } -static void port_release(struct device *dev) -{ - struct usb_serial_port *port = to_usb_serial_port(dev); - - dbg ("%s - %s", __func__, dev_name(dev)); - port_free(port); -} - static void kill_traffic(struct usb_serial_port *port) { usb_kill_urb(port->read_urb); @@ -623,8 +615,12 @@ usb_kill_urb(port->interrupt_out_urb); } -static void port_free(struct usb_serial_port *port) +static void port_release(struct device *dev) { + struct usb_serial_port *port = to_usb_serial_port(dev); + + dbg ("%s - %s", __func__, dev_name(dev)); + /* * Stop all the traffic before cancelling the work, so that * nobody will restart it by calling usb_serial_port_softint. @@ -935,6 +931,11 @@ mutex_init(&port->mutex); INIT_WORK(&port->work, usb_serial_port_work); serial->port[i] = port; + port->dev.parent = &interface->dev; + port->dev.driver = NULL; + port->dev.bus = &usb_serial_bus_type; + port->dev.release = &port_release; + device_initialize(&port->dev); } /* set up the endpoint information */ @@ -1060,12 +1061,15 @@ module_put(type->driver.owner); if (retval < 0) goto probe_error; + serial->attached = 1; if (retval > 0) { /* quietly accept this device, but don't bind to a serial port as it's about to disappear */ serial->num_ports = 0; goto exit; } + } else { + serial->attached = 1; } if (get_free_serial(serial, num_ports, &minor) == NULL) { @@ -1077,15 +1081,10 @@ /* register all of the individual ports with the driver core */ for (i = 0; i < num_ports; ++i) { port = serial->port[i]; - port->dev.parent = &interface->dev; - port->dev.driver = NULL; - port->dev.bus = &usb_serial_bus_type; - port->dev.release = &port_release; - dev_set_name(&port->dev, "ttyUSB%d", port->number); dbg ("%s - registering %s", __func__, dev_name(&port->dev)); port->dev_state = PORT_REGISTERING; - retval = device_register(&port->dev); + retval = device_add(&port->dev); if (retval) { dev_err(&port->dev, "Error registering port device, " "continuing\n"); @@ -1103,39 +1102,7 @@ return 0; probe_error: - for (i = 0; i < num_bulk_in; ++i) { - port = serial->port[i]; - if (!port) - continue; - usb_free_urb(port->read_urb); - kfree(port->bulk_in_buffer); - } - for (i = 0; i < num_bulk_out; ++i) { - port = serial->port[i]; - if (!port) - continue; - usb_free_urb(port->write_urb); - kfree(port->bulk_out_buffer); - } - for (i = 0; i < num_interrupt_in; ++i) { - port = serial->port[i]; - if (!port) - continue; - usb_free_urb(port->interrupt_in_urb); - kfree(port->interrupt_in_buffer); - } - for (i = 0; i < num_interrupt_out; ++i) { - port = serial->port[i]; - if (!port) - continue; - usb_free_urb(port->interrupt_out_urb); - kfree(port->interrupt_out_buffer); - } - - /* free up any memory that we allocated */ - for (i = 0; i < serial->num_port_pointers; ++i) - kfree(serial->port[i]); - kfree(serial); + usb_serial_put(serial); return -EIO; } EXPORT_SYMBOL_GPL(usb_serial_probe); @@ -1161,10 +1128,7 @@ if (port) { struct tty_struct *tty = tty_port_tty_get(&port->port); if (tty) { - /* The hangup will occur asynchronously but - the object refcounts will sort out all the - cleanup */ - tty_hangup(tty); + tty_vhangup(tty); tty_kref_put(tty); } kill_traffic(port); @@ -1189,8 +1153,7 @@ } serial->type->disconnect(serial); - /* let the last holder of this object - * cause it to be cleaned up */ + /* let the last holder of this object cause it to be cleaned up */ usb_serial_put(serial); dev_info(dev, "device disconnected\n"); } @@ -1246,6 +1209,8 @@ .chars_in_buffer = serial_chars_in_buffer, .tiocmget = serial_tiocmget, .tiocmset = serial_tiocmset, + .shutdown = serial_release, + .install = serial_install, .proc_fops = &serial_proc_fops, }; --- linux-ec2-2.6.31.orig/drivers/usb/serial/ftdi_sio.c +++ linux-ec2-2.6.31/drivers/usb/serial/ftdi_sio.c @@ -76,13 +76,7 @@ unsigned long last_dtr_rts; /* saved modem control outputs */ wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */ char prev_status, diff_status; /* Used for TIOCMIWAIT */ - __u8 rx_flags; /* receive state flags (throttling) */ - spinlock_t rx_lock; /* spinlock for receive state */ - struct delayed_work rx_work; struct usb_serial_port *port; - int rx_processed; - unsigned long rx_bytes; - __u16 interface; /* FT2232C, FT2232H or FT4232H port interface (0 for FT232/245) */ @@ -176,6 +170,9 @@ { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RELAIS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_SNIFFER_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_THROTTLE_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GATEWAY_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SPROG_II) }, @@ -694,6 +691,8 @@ { USB_DEVICE(DE_VID, WHT_PID) }, { USB_DEVICE(ADI_VID, ADI_GNICE_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) }, { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, @@ -702,6 +701,8 @@ { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) }, { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) }, + { USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; @@ -730,10 +731,6 @@ /* Constants for read urb and write urb */ #define BUFSZ 512 -/* rx_flags */ -#define THROTTLED 0x01 -#define ACTUALLY_THROTTLED 0x02 - /* Used for TIOCMIWAIT */ #define FTDI_STATUS_B0_MASK (FTDI_RS0_CTS | FTDI_RS0_DSR | FTDI_RS0_RI | FTDI_RS0_RLSD) #define FTDI_STATUS_B1_MASK (FTDI_RS_BI) @@ -757,7 +754,7 @@ static int ftdi_chars_in_buffer(struct tty_struct *tty); static void ftdi_write_bulk_callback(struct urb *urb); static void ftdi_read_bulk_callback(struct urb *urb); -static void ftdi_process_read(struct work_struct *work); +static void ftdi_process_read(struct usb_serial_port *port); static void ftdi_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); static int ftdi_tiocmget(struct tty_struct *tty, struct file *file); @@ -1228,7 +1225,6 @@ (new_serial.flags & ASYNC_FLAGS)); priv->custom_divisor = new_serial.custom_divisor; - tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; write_latency_timer(port); check_and_exit: @@ -1521,7 +1517,6 @@ } kref_init(&priv->kref); - spin_lock_init(&priv->rx_lock); spin_lock_init(&priv->tx_lock); init_waitqueue_head(&priv->delta_msr_wait); /* This will push the characters through immediately rather @@ -1543,7 +1538,6 @@ port->read_urb->transfer_buffer_length = BUFSZ; } - INIT_DELAYED_WORK(&priv->rx_work, ftdi_process_read); priv->port = port; /* Free port's existing write urb and transfer buffer. */ @@ -1680,6 +1674,26 @@ return 0; } +static int ftdi_submit_read_urb(struct usb_serial_port *port, gfp_t mem_flags) +{ + struct urb *urb = port->read_urb; + struct usb_serial *serial = port->serial; + int result; + + usb_fill_bulk_urb(urb, serial->dev, + usb_rcvbulkpipe(serial->dev, + port->bulk_in_endpointAddress), + urb->transfer_buffer, + urb->transfer_buffer_length, + ftdi_read_bulk_callback, port); + result = usb_submit_urb(urb, mem_flags); + if (result) + dev_err(&port->dev, + "%s - failed submitting read urb, error %d\n", + __func__, result); + return result; +} + static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { /* ftdi_open */ @@ -1695,12 +1709,6 @@ spin_lock_irqsave(&priv->tx_lock, flags); priv->tx_bytes = 0; spin_unlock_irqrestore(&priv->tx_lock, flags); - spin_lock_irqsave(&priv->rx_lock, flags); - priv->rx_bytes = 0; - spin_unlock_irqrestore(&priv->rx_lock, flags); - - if (tty) - tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; write_latency_timer(port); @@ -1720,23 +1728,14 @@ ftdi_set_termios(tty, port, tty->termios); /* Not throttled */ - spin_lock_irqsave(&priv->rx_lock, flags); - priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED); - spin_unlock_irqrestore(&priv->rx_lock, flags); + spin_lock_irqsave(&port->lock, flags); + port->throttled = 0; + port->throttle_req = 0; + spin_unlock_irqrestore(&port->lock, flags); /* Start reading from the device */ - priv->rx_processed = 0; - usb_fill_bulk_urb(port->read_urb, dev, - usb_rcvbulkpipe(dev, port->bulk_in_endpointAddress), - port->read_urb->transfer_buffer, - port->read_urb->transfer_buffer_length, - ftdi_read_bulk_callback, port); - result = usb_submit_urb(port->read_urb, GFP_KERNEL); - if (result) - dev_err(&port->dev, - "%s - failed submitting read urb, error %d\n", - __func__, result); - else + result = ftdi_submit_read_urb(port, GFP_KERNEL); + if (!result) kref_get(&priv->kref); return result; @@ -1782,10 +1781,6 @@ dbg("%s", __func__); - - /* cancel any scheduled reading */ - cancel_delayed_work_sync(&priv->rx_work); - /* shutdown our bulk read */ usb_kill_urb(port->read_urb); kref_put(&priv->kref, ftdi_sio_priv_release); @@ -1944,7 +1939,7 @@ return; } /* account for transferred data */ - countback = urb->actual_length; + countback = urb->transfer_buffer_length; data_offset = priv->write_offset; if (data_offset > 0) { /* Subtract the control bytes */ @@ -1957,7 +1952,6 @@ if (status) { dbg("nonzero write bulk status received: %d", status); - return; } usb_serial_port_softint(port); @@ -2008,271 +2002,121 @@ return buffered; } -static void ftdi_read_bulk_callback(struct urb *urb) +static int ftdi_process_packet(struct tty_struct *tty, + struct usb_serial_port *port, struct ftdi_private *priv, + char *packet, int len) { - struct usb_serial_port *port = urb->context; - struct tty_struct *tty; - struct ftdi_private *priv; - unsigned long countread; - unsigned long flags; - int status = urb->status; - - if (urb->number_of_packets > 0) { - dev_err(&port->dev, "%s transfer_buffer_length %d " - "actual_length %d number of packets %d\n", __func__, - urb->transfer_buffer_length, - urb->actual_length, urb->number_of_packets); - dev_err(&port->dev, "%s transfer_flags %x\n", __func__, - urb->transfer_flags); - } + int i; + char status; + char flag; + char *ch; dbg("%s - port %d", __func__, port->number); - if (port->port.count <= 0) - return; - - tty = tty_port_tty_get(&port->port); - if (!tty) { - dbg("%s - bad tty pointer - exiting", __func__); - return; + if (len < 2) { + dbg("malformed packet"); + return 0; } - priv = usb_get_serial_port_data(port); - if (!priv) { - dbg("%s - bad port private data pointer - exiting", __func__); - goto out; + /* Compare new line status to the old one, signal if different/ + N.B. packet may be processed more than once, but differences + are only processed once. */ + status = packet[0] & FTDI_STATUS_B0_MASK; + if (status != priv->prev_status) { + priv->diff_status |= status ^ priv->prev_status; + wake_up_interruptible(&priv->delta_msr_wait); + priv->prev_status = status; } - if (urb != port->read_urb) - dev_err(&port->dev, "%s - Not my urb!\n", __func__); - - if (status) { - /* This will happen at close every time so it is a dbg not an - err */ - dbg("(this is ok on close) nonzero read bulk status received: %d", status); - goto out; + /* + * Although the device uses a bitmask and hence can have multiple + * errors on a packet - the order here sets the priority the error is + * returned to the tty layer. + */ + flag = TTY_NORMAL; + if (packet[1] & FTDI_RS_OE) { + flag = TTY_OVERRUN; + dbg("OVERRRUN error"); + } + if (packet[1] & FTDI_RS_BI) { + flag = TTY_BREAK; + dbg("BREAK received"); + usb_serial_handle_break(port); + } + if (packet[1] & FTDI_RS_PE) { + flag = TTY_PARITY; + dbg("PARITY error"); + } + if (packet[1] & FTDI_RS_FE) { + flag = TTY_FRAME; + dbg("FRAMING error"); } - /* count data bytes, but not status bytes */ - countread = urb->actual_length; - countread -= 2 * DIV_ROUND_UP(countread, priv->max_packet_size); - spin_lock_irqsave(&priv->rx_lock, flags); - priv->rx_bytes += countread; - spin_unlock_irqrestore(&priv->rx_lock, flags); - - ftdi_process_read(&priv->rx_work.work); -out: - tty_kref_put(tty); -} /* ftdi_read_bulk_callback */ + len -= 2; + if (!len) + return 0; /* status only */ + ch = packet + 2; + if (!(port->console && port->sysrq) && flag == TTY_NORMAL) + tty_insert_flip_string(tty, ch, len); + else { + for (i = 0; i < len; i++, ch++) { + if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + tty_insert_flip_char(tty, *ch, flag); + } + } + return len; +} -static void ftdi_process_read(struct work_struct *work) -{ /* ftdi_process_read */ - struct ftdi_private *priv = - container_of(work, struct ftdi_private, rx_work.work); - struct usb_serial_port *port = priv->port; - struct urb *urb; +static void ftdi_process_read(struct usb_serial_port *port) +{ + struct urb *urb = port->read_urb; struct tty_struct *tty; - char error_flag; - unsigned char *data; - + struct ftdi_private *priv = usb_get_serial_port_data(port); + char *data = (char *)urb->transfer_buffer; int i; - int result; - int need_flip; - int packet_offset; - unsigned long flags; - - dbg("%s - port %d", __func__, port->number); - - if (port->port.count <= 0) - return; + int len; + int count = 0; tty = tty_port_tty_get(&port->port); - if (!tty) { - dbg("%s - bad tty pointer - exiting", __func__); + if (!tty) return; - } - priv = usb_get_serial_port_data(port); - if (!priv) { - dbg("%s - bad port private data pointer - exiting", __func__); - goto out; + for (i = 0; i < urb->actual_length; i += priv->max_packet_size) { + len = min_t(int, urb->actual_length - i, priv->max_packet_size); + count += ftdi_process_packet(tty, port, priv, &data[i], len); } - urb = port->read_urb; - if (!urb) { - dbg("%s - bad read_urb pointer - exiting", __func__); - goto out; - } - - data = urb->transfer_buffer; - - if (priv->rx_processed) { - dbg("%s - already processed: %d bytes, %d remain", __func__, - priv->rx_processed, - urb->actual_length - priv->rx_processed); - } else { - /* The first two bytes of every read packet are status */ - if (urb->actual_length > 2) - usb_serial_debug_data(debug, &port->dev, __func__, - urb->actual_length, data); - else - dbg("Status only: %03oo %03oo", data[0], data[1]); - } - - - /* TO DO -- check for hung up line and handle appropriately: */ - /* send hangup */ - /* See acm.c - you do a tty_hangup - eg tty_hangup(tty) */ - /* if CD is dropped and the line is not CLOCAL then we should hangup */ - - need_flip = 0; - for (packet_offset = priv->rx_processed; - packet_offset < urb->actual_length; packet_offset += priv->max_packet_size) { - int length; - - /* Compare new line status to the old one, signal if different/ - N.B. packet may be processed more than once, but differences - are only processed once. */ - char new_status = data[packet_offset + 0] & - FTDI_STATUS_B0_MASK; - if (new_status != priv->prev_status) { - priv->diff_status |= - new_status ^ priv->prev_status; - wake_up_interruptible(&priv->delta_msr_wait); - priv->prev_status = new_status; - } - - length = min_t(u32, priv->max_packet_size, urb->actual_length-packet_offset)-2; - if (length < 0) { - dev_err(&port->dev, "%s - bad packet length: %d\n", - __func__, length+2); - length = 0; - } - - if (priv->rx_flags & THROTTLED) { - dbg("%s - throttled", __func__); - break; - } - if (tty_buffer_request_room(tty, length) < length) { - /* break out & wait for throttling/unthrottling to - happen */ - dbg("%s - receive room low", __func__); - break; - } - - /* Handle errors and break */ - error_flag = TTY_NORMAL; - /* Although the device uses a bitmask and hence can have - multiple errors on a packet - the order here sets the - priority the error is returned to the tty layer */ - - if (data[packet_offset+1] & FTDI_RS_OE) { - error_flag = TTY_OVERRUN; - dbg("OVERRRUN error"); - } - if (data[packet_offset+1] & FTDI_RS_BI) { - error_flag = TTY_BREAK; - dbg("BREAK received"); - usb_serial_handle_break(port); - } - if (data[packet_offset+1] & FTDI_RS_PE) { - error_flag = TTY_PARITY; - dbg("PARITY error"); - } - if (data[packet_offset+1] & FTDI_RS_FE) { - error_flag = TTY_FRAME; - dbg("FRAMING error"); - } - if (length > 0) { - for (i = 2; i < length+2; i++) { - /* Note that the error flag is duplicated for - every character received since we don't know - which character it applied to */ - if (!usb_serial_handle_sysrq_char(tty, port, - data[packet_offset + i])) - tty_insert_flip_char(tty, - data[packet_offset + i], - error_flag); - } - need_flip = 1; - } - -#ifdef NOT_CORRECT_BUT_KEEPING_IT_FOR_NOW - /* if a parity error is detected you get status packets forever - until a character is sent without a parity error. - This doesn't work well since the application receives a - never ending stream of bad data - even though new data - hasn't been sent. Therefore I (bill) have taken this out. - However - this might make sense for framing errors and so on - so I am leaving the code in for now. - */ - else { - if (error_flag != TTY_NORMAL) { - dbg("error_flag is not normal"); - /* In this case it is just status - if that is - an error send a bad character */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - tty_flip_buffer_push(tty); - tty_insert_flip_char(tty, 0xff, error_flag); - need_flip = 1; - } - } -#endif - } /* "for(packet_offset=0..." */ - - /* Low latency */ - if (need_flip) + if (count) tty_flip_buffer_push(tty); + tty_kref_put(tty); +} - if (packet_offset < urb->actual_length) { - /* not completely processed - record progress */ - priv->rx_processed = packet_offset; - dbg("%s - incomplete, %d bytes processed, %d remain", - __func__, packet_offset, - urb->actual_length - packet_offset); - /* check if we were throttled while processing */ - spin_lock_irqsave(&priv->rx_lock, flags); - if (priv->rx_flags & THROTTLED) { - priv->rx_flags |= ACTUALLY_THROTTLED; - spin_unlock_irqrestore(&priv->rx_lock, flags); - dbg("%s - deferring remainder until unthrottled", - __func__); - goto out; - } - spin_unlock_irqrestore(&priv->rx_lock, flags); - /* if the port is closed stop trying to read */ - if (port->port.count > 0) - /* delay processing of remainder */ - schedule_delayed_work(&priv->rx_work, 1); - else - dbg("%s - port is closed", __func__); - goto out; - } - - /* urb is completely processed */ - priv->rx_processed = 0; +static void ftdi_read_bulk_callback(struct urb *urb) +{ + struct usb_serial_port *port = urb->context; + unsigned long flags; - /* if the port is closed stop trying to read */ - if (port->port.count > 0) { - /* Continue trying to always read */ - usb_fill_bulk_urb(port->read_urb, port->serial->dev, - usb_rcvbulkpipe(port->serial->dev, - port->bulk_in_endpointAddress), - port->read_urb->transfer_buffer, - port->read_urb->transfer_buffer_length, - ftdi_read_bulk_callback, port); + dbg("%s - port %d", __func__, port->number); - result = usb_submit_urb(port->read_urb, GFP_ATOMIC); - if (result) - dev_err(&port->dev, - "%s - failed resubmitting read urb, error %d\n", - __func__, result); + if (urb->status) { + dbg("%s - nonzero read bulk status received: %d", + __func__, urb->status); + return; } -out: - tty_kref_put(tty); -} /* ftdi_process_read */ + usb_serial_debug_data(debug, &port->dev, __func__, + urb->actual_length, urb->transfer_buffer); + ftdi_process_read(port); + + spin_lock_irqsave(&port->lock, flags); + port->throttled = port->throttle_req; + if (!port->throttled) { + spin_unlock_irqrestore(&port->lock, flags); + ftdi_submit_read_urb(port, GFP_ATOMIC); + } else + spin_unlock_irqrestore(&port->lock, flags); +} static void ftdi_break_ctl(struct tty_struct *tty, int break_state) { @@ -2604,33 +2448,31 @@ static void ftdi_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned long flags; dbg("%s - port %d", __func__, port->number); - spin_lock_irqsave(&priv->rx_lock, flags); - priv->rx_flags |= THROTTLED; - spin_unlock_irqrestore(&priv->rx_lock, flags); + spin_lock_irqsave(&port->lock, flags); + port->throttle_req = 1; + spin_unlock_irqrestore(&port->lock, flags); } - -static void ftdi_unthrottle(struct tty_struct *tty) +void ftdi_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct ftdi_private *priv = usb_get_serial_port_data(port); - int actually_throttled; + int was_throttled; unsigned long flags; dbg("%s - port %d", __func__, port->number); - spin_lock_irqsave(&priv->rx_lock, flags); - actually_throttled = priv->rx_flags & ACTUALLY_THROTTLED; - priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED); - spin_unlock_irqrestore(&priv->rx_lock, flags); - - if (actually_throttled) - schedule_delayed_work(&priv->rx_work, 0); + spin_lock_irqsave(&port->lock, flags); + was_throttled = port->throttled; + port->throttled = port->throttle_req = 0; + spin_unlock_irqrestore(&port->lock, flags); + + /* Resubmit urb if throttled and open. */ + if (was_throttled && test_bit(ASYNCB_INITIALIZED, &port->port.flags)) + ftdi_submit_read_urb(port, GFP_KERNEL); } static int __init ftdi_init(void) --- linux-ec2-2.6.31.orig/drivers/usb/serial/empeg.c +++ linux-ec2-2.6.31/drivers/usb/serial/empeg.c @@ -90,8 +90,7 @@ static void empeg_throttle(struct tty_struct *tty); static void empeg_unthrottle(struct tty_struct *tty); static int empeg_startup(struct usb_serial *serial); -static void empeg_set_termios(struct tty_struct *tty, - struct usb_serial_port *port, struct ktermios *old_termios); +static void empeg_init_termios(struct tty_struct *tty); static void empeg_write_bulk_callback(struct urb *urb); static void empeg_read_bulk_callback(struct urb *urb); @@ -123,7 +122,7 @@ .throttle = empeg_throttle, .unthrottle = empeg_unthrottle, .attach = empeg_startup, - .set_termios = empeg_set_termios, + .init_termios = empeg_init_termios, .write = empeg_write, .write_room = empeg_write_room, .chars_in_buffer = empeg_chars_in_buffer, @@ -150,9 +149,6 @@ dbg("%s - port %d", __func__, port->number); - /* Force default termio settings */ - empeg_set_termios(tty, port, NULL) ; - bytes_in = 0; bytes_out = 0; @@ -425,11 +421,9 @@ } -static void empeg_set_termios(struct tty_struct *tty, - struct usb_serial_port *port, struct ktermios *old_termios) +static void empeg_init_termios(struct tty_struct *tty) { struct ktermios *termios = tty->termios; - dbg("%s - port %d", __func__, port->number); /* * The empeg-car player wants these particular tty settings. --- linux-ec2-2.6.31.orig/drivers/usb/serial/kobil_sct.c +++ linux-ec2-2.6.31/drivers/usb/serial/kobil_sct.c @@ -85,7 +85,7 @@ static void kobil_write_callback(struct urb *purb); static void kobil_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); - +static void kobil_init_termios(struct tty_struct *tty); static struct usb_device_id id_table [] = { { USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_ADAPTER_B_PRODUCT_ID) }, @@ -120,6 +120,7 @@ .release = kobil_release, .ioctl = kobil_ioctl, .set_termios = kobil_set_termios, + .init_termios = kobil_init_termios, .tiocmget = kobil_tiocmget, .tiocmset = kobil_tiocmset, .open = kobil_open, @@ -210,6 +211,15 @@ kfree(usb_get_serial_port_data(serial->port[i])); } +static void kobil_init_termios(struct tty_struct *tty) +{ + /* Default to echo off and other sane device settings */ + tty->termios->c_lflag = 0; + tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN | XCASE); + tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF; + /* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */ + tty->termios->c_oflag &= ~ONLCR; +} static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) @@ -226,16 +236,6 @@ /* someone sets the dev to 0 if the close method has been called */ port->interrupt_in_urb->dev = port->serial->dev; - if (tty) { - - /* Default to echo off and other sane device settings */ - tty->termios->c_lflag = 0; - tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN | - XCASE); - tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF; - /* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */ - tty->termios->c_oflag &= ~ONLCR; - } /* allocate memory for transfer buffer */ transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL); if (!transfer_buffer) --- linux-ec2-2.6.31.orig/drivers/usb/serial/generic.c +++ linux-ec2-2.6.31/drivers/usb/serial/generic.c @@ -530,7 +530,7 @@ if (was_throttled) { /* Resume reading from device */ - usb_serial_generic_resubmit_read_urb(port, GFP_KERNEL); + flush_and_resubmit_read_urb(port); } } --- linux-ec2-2.6.31.orig/drivers/usb/serial/oti6858.c +++ linux-ec2-2.6.31/drivers/usb/serial/oti6858.c @@ -146,6 +146,7 @@ static void oti6858_close(struct usb_serial_port *port); static void oti6858_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); +static void oti6858_init_termios(struct tty_struct *tty); static int oti6858_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); static void oti6858_read_int_callback(struct urb *urb); @@ -186,6 +187,7 @@ .write = oti6858_write, .ioctl = oti6858_ioctl, .set_termios = oti6858_set_termios, + .init_termios = oti6858_init_termios, .tiocmget = oti6858_tiocmget, .tiocmset = oti6858_tiocmset, .read_bulk_callback = oti6858_read_bulk_callback, @@ -206,7 +208,6 @@ struct { u8 read_urb_in_use; u8 write_urb_in_use; - u8 termios_initialized; } flags; struct delayed_work delayed_write_work; @@ -447,6 +448,14 @@ return chars; } +static void oti6858_init_termios(struct tty_struct *tty) +{ + *(tty->termios) = tty_std_termios; + tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL; + tty->termios->c_ispeed = 38400; + tty->termios->c_ospeed = 38400; +} + static void oti6858_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { @@ -464,16 +473,6 @@ return; } - spin_lock_irqsave(&priv->lock, flags); - if (!priv->flags.termios_initialized) { - *(tty->termios) = tty_std_termios; - tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL; - tty->termios->c_ispeed = 38400; - tty->termios->c_ospeed = 38400; - priv->flags.termios_initialized = 1; - } - spin_unlock_irqrestore(&priv->lock, flags); - cflag = tty->termios->c_cflag; spin_lock_irqsave(&priv->lock, flags); --- linux-ec2-2.6.31.orig/drivers/usb/serial/pl2303.h +++ linux-ec2-2.6.31/drivers/usb/serial/pl2303.h @@ -130,3 +130,7 @@ /* Sony, USB data cable for CMD-Jxx mobile phones */ #define SONY_VENDOR_ID 0x054c #define SONY_QN3USB_PRODUCT_ID 0x0437 + +/* Sanwa KB-USB2 multimeter cable (ID: 11ad:0001) */ +#define SANWA_VENDOR_ID 0x11ad +#define SANWA_PRODUCT_ID 0x0001 --- linux-ec2-2.6.31.orig/drivers/usb/serial/cypress_m8.c +++ linux-ec2-2.6.31/drivers/usb/serial/cypress_m8.c @@ -659,15 +659,7 @@ spin_unlock_irqrestore(&priv->lock, flags); /* Set termios */ - result = cypress_write(tty, port, NULL, 0); - - if (result) { - dev_err(&port->dev, - "%s - failed setting the control lines - error %d\n", - __func__, result); - return result; - } else - dbg("%s - success setting the control lines", __func__); + cypress_send(port); if (tty) cypress_set_termios(tty, port, &priv->tmp_termios); @@ -1005,6 +997,8 @@ dbg("%s - port %d", __func__, port->number); spin_lock_irqsave(&priv->lock, flags); + /* We can't clean this one up as we don't know the device type + early enough */ if (!priv->termios_initialized) { if (priv->chiptype == CT_EARTHMATE) { *(tty->termios) = tty_std_termios; --- linux-ec2-2.6.31.orig/drivers/usb/serial/ipaq.c +++ linux-ec2-2.6.31/drivers/usb/serial/ipaq.c @@ -547,7 +547,6 @@ { USB_DEVICE(0x413C, 0x4009) }, /* Dell Axim USB Sync */ { USB_DEVICE(0x4505, 0x0010) }, /* Smartphone */ { USB_DEVICE(0x5E04, 0xCE00) }, /* SAGEM Wireless Assistant */ - { USB_DEVICE(0x0BB4, 0x00CF) }, /* HTC smartphone modems */ { } /* Terminating entry */ }; @@ -971,6 +970,15 @@ static int ipaq_startup(struct usb_serial *serial) { dbg("%s", __func__); + + /* Some of the devices in ipaq_id_table[] are composite, and we + * shouldn't bind to all the interfaces. This test will rule out + * some obviously invalid possibilities. + */ + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) + return -ENODEV; + if (serial->dev->actconfig->desc.bConfigurationValue != 1) { /* * FIXME: HP iPaq rx3715, possibly others, have 1 config that --- linux-ec2-2.6.31.orig/drivers/usb/serial/spcp8x5.c +++ linux-ec2-2.6.31/drivers/usb/serial/spcp8x5.c @@ -299,7 +299,6 @@ wait_queue_head_t delta_msr_wait; u8 line_control; u8 line_status; - u8 termios_initialized; }; /* desc : when device plug in,this function would be called. @@ -498,6 +497,15 @@ dev_dbg(&port->dev, "usb_unlink_urb(read_urb) = %d\n", result); } +static void spcp8x5_init_termios(struct tty_struct *tty) +{ + /* for the 1st time call this function */ + *(tty->termios) = tty_std_termios; + tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL; + tty->termios->c_ispeed = 115200; + tty->termios->c_ospeed = 115200; +} + /* set the serial param for transfer. we should check if we really need to * transfer. if we set flow control we should do this too. */ static void spcp8x5_set_termios(struct tty_struct *tty, @@ -514,16 +522,6 @@ int i; u8 control; - /* for the 1st time call this function */ - spin_lock_irqsave(&priv->lock, flags); - if (!priv->termios_initialized) { - *(tty->termios) = tty_std_termios; - tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL; - tty->termios->c_ispeed = 115200; - tty->termios->c_ospeed = 115200; - priv->termios_initialized = 1; - } - spin_unlock_irqrestore(&priv->lock, flags); /* check that they really want us to change something */ if (!tty_termios_hw_change(tty->termios, old_termios)) @@ -1011,6 +1009,7 @@ .carrier_raised = spcp8x5_carrier_raised, .write = spcp8x5_write, .set_termios = spcp8x5_set_termios, + .init_termios = spcp8x5_init_termios, .ioctl = spcp8x5_ioctl, .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, --- linux-ec2-2.6.31.orig/drivers/usb/serial/ftdi_sio.h +++ linux-ec2-2.6.31/drivers/usb/serial/ftdi_sio.h @@ -81,6 +81,9 @@ /* OpenDCC (www.opendcc.de) product id */ #define FTDI_OPENDCC_PID 0xBFD8 +#define FTDI_OPENDCC_SNIFFER_PID 0xBFD9 +#define FTDI_OPENDCC_THROTTLE_PID 0xBFDA +#define FTDI_OPENDCC_GATEWAY_PID 0xBFDB /* Sprog II (Andrew Crosland's SprogII DCC interface) */ #define FTDI_SPROG_II 0xF0C8 @@ -930,6 +933,7 @@ */ #define ADI_VID 0x0456 #define ADI_GNICE_PID 0xF000 +#define ADI_GNICEPLUS_PID 0xF001 /* * JETI SPECTROMETER SPECBOS 1201 @@ -968,6 +972,12 @@ #define MARVELL_OPENRD_PID 0x9e90 /* + * Hameg HO820 and HO870 interface (using VID 0x0403) + */ +#define HAMEG_HO820_PID 0xed74 +#define HAMEG_HO870_PID 0xed71 + +/* * BmRequestType: 1100 0000b * bRequest: FTDI_E2_READ * wValue: 0 --- linux-ec2-2.6.31.orig/drivers/usb/serial/console.c +++ linux-ec2-2.6.31/drivers/usb/serial/console.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ char *s; struct usb_serial *serial; struct usb_serial_port *port; - int retval = 0; + int retval; struct tty_struct *tty = NULL; struct ktermios *termios = NULL, dummy; @@ -116,13 +117,17 @@ return -ENODEV; } - port = serial->port[0]; + retval = usb_autopm_get_interface(serial->interface); + if (retval) + goto error_get_interface; + + port = serial->port[co->index - serial->minor]; tty_port_tty_set(&port->port, NULL); info->port = port; ++port->port.count; - if (port->port.count == 1) { + if (!test_bit(ASYNCB_INITIALIZED, &port->port.flags)) { if (serial->type->set_termios) { /* * allocate a fake tty so the driver can initialize @@ -168,6 +173,7 @@ kfree(termios); kfree(tty); } + set_bit(ASYNCB_INITIALIZED, &port->port.flags); } /* Now that any required fake tty operations are completed restore * the tty port count */ @@ -175,18 +181,22 @@ /* The console is special in terms of closing the device so * indicate this port is now acting as a system console. */ port->console = 1; - retval = 0; -out: + mutex_unlock(&serial->disc_mutex); return retval; -free_termios: + + free_termios: kfree(termios); tty_port_tty_set(&port->port, NULL); -free_tty: + free_tty: kfree(tty); -reset_open_count: + reset_open_count: port->port.count = 0; - goto out; + usb_autopm_put_interface(serial->interface); + error_get_interface: + usb_serial_put(serial); + mutex_unlock(&serial->disc_mutex); + return retval; } static void usb_console_write(struct console *co, --- linux-ec2-2.6.31.orig/drivers/usb/serial/ark3116.c +++ linux-ec2-2.6.31/drivers/usb/serial/ark3116.c @@ -35,11 +35,6 @@ }; MODULE_DEVICE_TABLE(usb, id_table); -struct ark3116_private { - spinlock_t lock; - u8 termios_initialized; -}; - static inline void ARK3116_SND(struct usb_serial *serial, int seq, __u8 request, __u8 requesttype, __u16 value, __u16 index) @@ -82,22 +77,11 @@ static int ark3116_attach(struct usb_serial *serial) { char *buf; - struct ark3116_private *priv; - int i; - - for (i = 0; i < serial->num_ports; ++i) { - priv = kzalloc(sizeof(struct ark3116_private), GFP_KERNEL); - if (!priv) - goto cleanup; - spin_lock_init(&priv->lock); - - usb_set_serial_port_data(serial->port[i], priv); - } buf = kmalloc(1, GFP_KERNEL); if (!buf) { dbg("error kmalloc -> out of mem?"); - goto cleanup; + return -ENOMEM; } /* 3 */ @@ -149,13 +133,16 @@ kfree(buf); return 0; +} -cleanup: - for (--i; i >= 0; --i) { - kfree(usb_get_serial_port_data(serial->port[i])); - usb_set_serial_port_data(serial->port[i], NULL); - } - return -ENOMEM; +static void ark3116_init_termios(struct tty_struct *tty) +{ + struct ktermios *termios = tty->termios; + *termios = tty_std_termios; + termios->c_cflag = B9600 | CS8 + | CREAD | HUPCL | CLOCAL; + termios->c_ispeed = 9600; + termios->c_ospeed = 9600; } static void ark3116_set_termios(struct tty_struct *tty, @@ -163,10 +150,8 @@ struct ktermios *old_termios) { struct usb_serial *serial = port->serial; - struct ark3116_private *priv = usb_get_serial_port_data(port); struct ktermios *termios = tty->termios; unsigned int cflag = termios->c_cflag; - unsigned long flags; int baud; int ark3116_baud; char *buf; @@ -176,16 +161,6 @@ dbg("%s - port %d", __func__, port->number); - spin_lock_irqsave(&priv->lock, flags); - if (!priv->termios_initialized) { - *termios = tty_std_termios; - termios->c_cflag = B9600 | CS8 - | CREAD | HUPCL | CLOCAL; - termios->c_ispeed = 9600; - termios->c_ospeed = 9600; - priv->termios_initialized = 1; - } - spin_unlock_irqrestore(&priv->lock, flags); cflag = termios->c_cflag; termios->c_cflag &= ~(CMSPAR|CRTSCTS); @@ -455,6 +430,7 @@ .num_ports = 1, .attach = ark3116_attach, .set_termios = ark3116_set_termios, + .init_termios = ark3116_init_termios, .ioctl = ark3116_ioctl, .tiocmget = ark3116_tiocmget, .open = ark3116_open, --- linux-ec2-2.6.31.orig/drivers/usb/serial/whiteheat.c +++ linux-ec2-2.6.31/drivers/usb/serial/whiteheat.c @@ -259,7 +259,7 @@ __u8 *data, __u8 datasize); static int firm_open(struct usb_serial_port *port); static int firm_close(struct usb_serial_port *port); -static int firm_setup_port(struct tty_struct *tty); +static void firm_setup_port(struct tty_struct *tty); static int firm_set_rts(struct usb_serial_port *port, __u8 onoff); static int firm_set_dtr(struct usb_serial_port *port, __u8 onoff); static int firm_set_break(struct usb_serial_port *port, __u8 onoff); @@ -1211,7 +1211,7 @@ } -static int firm_setup_port(struct tty_struct *tty) +static void firm_setup_port(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct whiteheat_port_settings port_settings; @@ -1286,7 +1286,7 @@ port_settings.lloop = 0; /* now send the message to the device */ - return firm_send_command(port, WHITEHEAT_SETUP_PORT, + firm_send_command(port, WHITEHEAT_SETUP_PORT, (__u8 *)&port_settings, sizeof(port_settings)); } --- linux-ec2-2.6.31.orig/drivers/usb/serial/cp210x.c +++ linux-ec2-2.6.31/drivers/usb/serial/cp210x.c @@ -51,6 +51,8 @@ static void cp210x_break_ctl(struct tty_struct *, int); static int cp210x_startup(struct usb_serial *); static void cp210x_disconnect(struct usb_serial *); +static void cp210x_dtr_rts(struct usb_serial_port *p, int on); +static int cp210x_carrier_raised(struct usb_serial_port *p); static int debug; @@ -114,6 +116,7 @@ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ { } /* Terminating Entry */ }; @@ -143,6 +146,8 @@ .tiocmset = cp210x_tiocmset, .attach = cp210x_startup, .disconnect = cp210x_disconnect, + .dtr_rts = cp210x_dtr_rts, + .carrier_raised = cp210x_carrier_raised }; /* Config request types */ @@ -399,12 +404,6 @@ /* Configure the termios structure */ cp210x_get_termios(tty, port); - - /* Set the DTR and RTS pins low */ - cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data - : port, - NULL, TIOCM_DTR | TIOCM_RTS, 0); - return 0; } @@ -753,6 +752,14 @@ return cp210x_set_config(port, CP210X_SET_MHS, &control, 2); } +static void cp210x_dtr_rts(struct usb_serial_port *p, int on) +{ + if (on) + cp210x_tiocmset_port(p, NULL, TIOCM_DTR|TIOCM_RTS, 0); + else + cp210x_tiocmset_port(p, NULL, 0, TIOCM_DTR|TIOCM_RTS); +} + static int cp210x_tiocmget (struct tty_struct *tty, struct file *file) { struct usb_serial_port *port = tty->driver_data; @@ -775,6 +782,15 @@ return result; } +static int cp210x_carrier_raised(struct usb_serial_port *p) +{ + unsigned int control; + cp210x_get_config(p, CP210X_GET_MDMSTS, &control, 1); + if (control & CONTROL_DCD) + return 1; + return 0; +} + static void cp210x_break_ctl (struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; --- linux-ec2-2.6.31.orig/drivers/usb/musb/musb_gadget_ep0.c +++ linux-ec2-2.6.31/drivers/usb/musb/musb_gadget_ep0.c @@ -199,7 +199,6 @@ static void musb_g_ep0_giveback(struct musb *musb, struct usb_request *req) { musb_g_giveback(&musb->endpoints[0].ep_in, req, 0); - musb->ep0_state = MUSB_EP0_STAGE_SETUP; } /* @@ -647,7 +646,7 @@ musb->ep0_state = MUSB_EP0_STAGE_STATUSIN; break; default: - ERR("SetupEnd came in a wrong ep0stage %s", + ERR("SetupEnd came in a wrong ep0stage %s\n", decode_ep0stage(musb->ep0_state)); } csr = musb_readw(regs, MUSB_CSR0); @@ -770,12 +769,18 @@ handled = service_zero_data_request( musb, &setup); + /* + * We're expecting no data in any case, so + * always set the DATAEND bit -- doing this + * here helps avoid SetupEnd interrupt coming + * in the idle stage when we're stalling... + */ + musb->ackpend |= MUSB_CSR0_P_DATAEND; + /* status stage might be immediate */ - if (handled > 0) { - musb->ackpend |= MUSB_CSR0_P_DATAEND; + if (handled > 0) musb->ep0_state = MUSB_EP0_STAGE_STATUSIN; - } break; /* sequence #1 (IN to host), includes GET_STATUS --- linux-ec2-2.6.31.orig/drivers/usb/musb/musb_gadget.c +++ linux-ec2-2.6.31/drivers/usb/musb/musb_gadget.c @@ -4,6 +4,7 @@ * Copyright 2005 Mentor Graphics Corporation * Copyright (C) 2005-2006 by Texas Instruments * Copyright (C) 2006-2007 Nokia Corporation + * Copyright (C) 2009 MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -436,14 +437,6 @@ csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~MUSB_TXCSR_P_SENTSTALL; musb_writew(epio, MUSB_TXCSR, csr); - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - musb->dma_controller->channel_abort(dma); - } - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - break; } @@ -582,15 +575,25 @@ */ static void rxstate(struct musb *musb, struct musb_request *req) { - u16 csr = 0; const u8 epnum = req->epnum; struct usb_request *request = &req->request; struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; void __iomem *epio = musb->endpoints[epnum].regs; unsigned fifo_count = 0; u16 len = musb_ep->packet_sz; + u16 csr = musb_readw(epio, MUSB_RXCSR); - csr = musb_readw(epio, MUSB_RXCSR); + /* We shouldn't get here while DMA is active, but we do... */ + if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) { + DBG(4, "DMA pending...\n"); + return; + } + + if (csr & MUSB_RXCSR_P_SENDSTALL) { + DBG(5, "%s stalling, RXCSR %04x\n", + musb_ep->end_point.name, csr); + return; + } if (is_cppi_enabled() && musb_ep->dma) { struct dma_controller *c = musb->dma_controller; @@ -761,19 +764,10 @@ csr, dma ? " (dma)" : "", request); if (csr & MUSB_RXCSR_P_SENTSTALL) { - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - (void) musb->dma_controller->channel_abort(dma); - request->actual += musb_ep->dma->actual_len; - } - csr |= MUSB_RXCSR_P_WZC_BITS; csr &= ~MUSB_RXCSR_P_SENTSTALL; musb_writew(epio, MUSB_RXCSR, csr); - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - goto done; + return; } if (csr & MUSB_RXCSR_P_OVERRUN) { @@ -795,7 +789,7 @@ DBG((csr & MUSB_RXCSR_DMAENAB) ? 4 : 1, "%s busy, csr %04x\n", musb_ep->end_point.name, csr); - goto done; + return; } if (dma && (csr & MUSB_RXCSR_DMAENAB)) { @@ -826,22 +820,15 @@ if ((request->actual < request->length) && (musb_ep->dma->actual_len == musb_ep->packet_sz)) - goto done; + return; #endif musb_g_giveback(musb_ep, request, 0); request = next_request(musb_ep); if (!request) - goto done; - - /* don't start more i/o till the stall clears */ - musb_ep_select(mbase, epnum); - csr = musb_readw(epio, MUSB_RXCSR); - if (csr & MUSB_RXCSR_P_SENDSTALL) - goto done; + return; } - /* analyze request if the ep is hot */ if (request) rxstate(musb, to_musb_request(request)); @@ -849,8 +836,6 @@ DBG(3, "packet waiting for %s%s request\n", musb_ep->desc ? "" : "inactive ", musb_ep->end_point.name); - -done: return; } @@ -1244,7 +1229,7 @@ void __iomem *mbase; unsigned long flags; u16 csr; - struct musb_request *request = NULL; + struct musb_request *request; int status = 0; if (!ep) @@ -1260,24 +1245,29 @@ musb_ep_select(mbase, epnum); - /* cannot portably stall with non-empty FIFO */ request = to_musb_request(next_request(musb_ep)); - if (value && musb_ep->is_in) { - csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) { - DBG(3, "%s fifo busy, cannot halt\n", ep->name); - spin_unlock_irqrestore(&musb->lock, flags); - return -EAGAIN; + if (value) { + if (request) { + DBG(3, "request in progress, cannot halt %s\n", + ep->name); + status = -EAGAIN; + goto done; + } + /* Cannot portably stall with non-empty FIFO */ + if (musb_ep->is_in) { + csr = musb_readw(epio, MUSB_TXCSR); + if (csr & MUSB_TXCSR_FIFONOTEMPTY) { + DBG(3, "FIFO busy, cannot halt %s\n", ep->name); + status = -EAGAIN; + goto done; + } } - } /* set/clear the stall and toggle bits */ DBG(2, "%s: %s stall\n", ep->name, value ? "set" : "clear"); if (musb_ep->is_in) { csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) - csr |= MUSB_TXCSR_FLUSHFIFO; csr |= MUSB_TXCSR_P_WZC_BITS | MUSB_TXCSR_CLRDATATOG; if (value) @@ -1300,14 +1290,13 @@ musb_writew(epio, MUSB_RXCSR, csr); } -done: - /* maybe start the first request in the queue */ if (!musb_ep->busy && !value && request) { DBG(3, "restarting the request\n"); musb_ep_restart(musb, request); } +done: spin_unlock_irqrestore(&musb->lock, flags); return status; } --- linux-ec2-2.6.31.orig/drivers/usb/core/usb.c +++ linux-ec2-2.6.31/drivers/usb/core/usb.c @@ -132,7 +132,7 @@ struct find_interface_arg { int minor; - struct usb_interface *interface; + struct device_driver *drv; }; static int __find_interface(struct device *dev, void *data) @@ -143,12 +143,10 @@ if (!is_usb_interface(dev)) return 0; + if (dev->driver != arg->drv) + return 0; intf = to_usb_interface(dev); - if (intf->minor != -1 && intf->minor == arg->minor) { - arg->interface = intf; - return 1; - } - return 0; + return intf->minor == arg->minor; } /** @@ -156,21 +154,24 @@ * @drv: the driver whose current configuration is considered * @minor: the minor number of the desired device * - * This walks the driver device list and returns a pointer to the interface - * with the matching minor. Note, this only works for devices that share the - * USB major number. + * This walks the bus device list and returns a pointer to the interface + * with the matching minor and driver. Note, this only works for devices + * that share the USB major number. */ struct usb_interface *usb_find_interface(struct usb_driver *drv, int minor) { struct find_interface_arg argb; - int retval; + struct device *dev; argb.minor = minor; - argb.interface = NULL; - /* eat the error, it will be in argb.interface */ - retval = driver_for_each_device(&drv->drvwrap.driver, NULL, &argb, - __find_interface); - return argb.interface; + argb.drv = &drv->drvwrap.driver; + + dev = bus_find_device(&usb_bus_type, NULL, &argb, __find_interface); + + /* Drop reference count from bus_find_device */ + put_device(dev); + + return dev ? to_usb_interface(dev) : NULL; } EXPORT_SYMBOL_GPL(usb_find_interface); --- linux-ec2-2.6.31.orig/drivers/usb/core/quirks.c +++ linux-ec2-2.6.31/drivers/usb/core/quirks.c @@ -74,6 +74,10 @@ /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, +#ifdef CONFIG_X86_LPIA + /* ASIX Ethernet Device */ + { USB_DEVICE(0x0b95, 0x1720), .driver_info = USB_QUIRK_RESET_RESUME }, +#endif { } /* terminating entry must be last */ }; --- linux-ec2-2.6.31.orig/drivers/usb/core/hub.c +++ linux-ec2-2.6.31/drivers/usb/core/hub.c @@ -439,7 +439,7 @@ static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { - return usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), + return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); } @@ -613,7 +613,7 @@ * time later khubd will disconnect() any existing usb_device on the port * and will re-enumerate if there actually is a device attached. */ -static void hub_port_logical_disconnect(struct usb_hub *hub, int port1) +void hub_port_logical_disconnect(struct usb_hub *hub, int port1) { dev_dbg(hub->intfdev, "logical disconnect on port %d\n", port1); hub_port_disable(hub, port1, 1); @@ -630,6 +630,7 @@ set_bit(port1, hub->change_bits); kick_khubd(hub); } +EXPORT_SYMBOL(hub_port_logical_disconnect); enum hub_activation_type { HUB_INIT, HUB_INIT2, HUB_INIT3, --- linux-ec2-2.6.31.orig/drivers/usb/core/config.c +++ linux-ec2-2.6.31/drivers/usb/core/config.c @@ -105,7 +105,7 @@ ep->ss_ep_comp->extralen = i; buffer += i; size -= i; - retval = buffer - buffer_start + i; + retval = buffer - buffer_start; if (num_skipped > 0) dev_dbg(ddev, "skipped %d descriptor%s after %s\n", num_skipped, plural(num_skipped), --- linux-ec2-2.6.31.orig/drivers/usb/core/devio.c +++ linux-ec2-2.6.31/drivers/usb/core/devio.c @@ -1262,14 +1262,11 @@ } } - free_async(as); - if (put_user(addr, (void __user * __user *)arg)) return -EFAULT; return 0; err_out: - free_async(as); return -EFAULT; } @@ -1299,8 +1296,11 @@ static int proc_reapurb(struct dev_state *ps, void __user *arg) { struct async *as = reap_as(ps); - if (as) - return processcompl(as, (void __user * __user *)arg); + if (as) { + int retval = processcompl(as, (void __user * __user *)arg); + free_async(as); + return retval; + } if (signal_pending(current)) return -EINTR; return -EIO; @@ -1308,11 +1308,16 @@ static int proc_reapurbnonblock(struct dev_state *ps, void __user *arg) { + int retval; struct async *as; - if (!(as = async_getcompleted(ps))) - return -EAGAIN; - return processcompl(as, (void __user * __user *)arg); + as = async_getcompleted(ps); + retval = -EAGAIN; + if (as) { + retval = processcompl(as, (void __user * __user *)arg); + free_async(as); + } + return retval; } #ifdef CONFIG_COMPAT @@ -1385,7 +1390,6 @@ } } - free_async(as); if (put_user(ptr_to_compat(addr), (u32 __user *)arg)) return -EFAULT; return 0; @@ -1394,8 +1398,11 @@ static int proc_reapurb_compat(struct dev_state *ps, void __user *arg) { struct async *as = reap_as(ps); - if (as) - return processcompl_compat(as, (void __user * __user *)arg); + if (as) { + int retval = processcompl_compat(as, (void __user * __user *)arg); + free_async(as); + return retval; + } if (signal_pending(current)) return -EINTR; return -EIO; @@ -1403,11 +1410,16 @@ static int proc_reapurbnonblock_compat(struct dev_state *ps, void __user *arg) { + int retval; struct async *as; - if (!(as = async_getcompleted(ps))) - return -EAGAIN; - return processcompl_compat(as, (void __user * __user *)arg); + retval = -EAGAIN; + as = async_getcompleted(ps); + if (as) { + retval = processcompl_compat(as, (void __user * __user *)arg); + free_async(as); + } + return retval; } #endif --- linux-ec2-2.6.31.orig/drivers/usb/core/driver.c +++ linux-ec2-2.6.31/drivers/usb/core/driver.c @@ -976,6 +976,11 @@ return status; } +#ifdef CONFIG_X86_LPIA +struct usb_hub; +void hub_port_logical_disconnect(struct usb_hub *hub, int port1); +#endif + /* Caller has locked intf's usb_device's pm_mutex */ static int usb_resume_interface(struct usb_device *udev, struct usb_interface *intf, pm_message_t msg, int reset_resume) @@ -1015,9 +1020,19 @@ dev_err(&intf->dev, "%s error %d\n", "reset_resume", status); } else { +#ifdef CONFIG_X86_LPIA + struct usb_device *udev = interface_to_usbdev(intf); + struct usb_device *pdev = udev->parent; +#endif intf->needs_binding = 1; dev_warn(&intf->dev, "no %s for driver %s?\n", "reset_resume", driver->name); +#ifdef CONFIG_X86_LPIA + if (pdev) { + struct usb_hub *phub = usb_get_intfdata(pdev->actconfig->interface[0]); + hub_port_logical_disconnect(phub, udev->portnum); + } +#endif } } else { if (driver->resume) { --- linux-ec2-2.6.31.orig/drivers/usb/class/cdc-acm.c +++ linux-ec2-2.6.31/drivers/usb/class/cdc-acm.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -608,8 +609,9 @@ acm->throttle = 0; - tasklet_schedule(&acm->urb_task); + set_bit(ASYNCB_INITIALIZED, &acm->port.flags); rv = tty_port_block_til_ready(&acm->port, tty, filp); + tasklet_schedule(&acm->urb_task); done: mutex_unlock(&acm->mutex); err_out: @@ -858,10 +860,7 @@ if (!ACM_READY(acm)) return; - /* FIXME: Needs to support the tty_baud interface */ - /* FIXME: Broken on sparc */ - newline.dwDTERate = cpu_to_le32p(acm_tty_speed + - (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0)); + newline.dwDTERate = cpu_to_le32(tty_get_baud_rate(tty)); newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0; newline.bParityType = termios->c_cflag & PARENB ? (termios->c_cflag & PARODD ? 1 : 2) + --- linux-ec2-2.6.31.orig/drivers/usb/class/usbtmc.c +++ linux-ec2-2.6.31/drivers/usb/class/usbtmc.c @@ -367,13 +367,13 @@ { struct usbtmc_device_data *data; struct device *dev; - unsigned long int n_characters; + u32 n_characters; u8 *buffer; int actual; - int done; - int remaining; + size_t done; + size_t remaining; int retval; - int this_part; + size_t this_part; /* Get pointer to private data structure */ data = filp->private_data; @@ -455,6 +455,18 @@ (buffer[6] << 16) + (buffer[7] << 24); + /* Ensure the instrument doesn't lie about it */ + if(n_characters > actual - 12) { + dev_err(dev, "Device lies about message size: %zu > %zu\n", n_characters, actual - 12); + n_characters = actual - 12; + } + + /* Ensure the instrument doesn't send more back than requested */ + if(n_characters > this_part) { + dev_err(dev, "Device returns more than requested: %zu > %zu\n", done + n_characters, done + this_part); + n_characters = this_part; + } + /* Copy buffer to user space */ if (copy_to_user(buf + done, &buffer[12], n_characters)) { /* There must have been an addressing problem */ @@ -465,6 +477,8 @@ done += n_characters; if (n_characters < USBTMC_SIZE_IOBUFFER) remaining = 0; + else + remaining -= n_characters; } /* Update file position value */ @@ -531,10 +545,16 @@ n_bytes = roundup(12 + this_part, 4); memset(buffer + 12 + this_part, 0, n_bytes - (12 + this_part)); - retval = usb_bulk_msg(data->usb_dev, - usb_sndbulkpipe(data->usb_dev, - data->bulk_out), - buffer, n_bytes, &actual, USBTMC_TIMEOUT); + do { + retval = usb_bulk_msg(data->usb_dev, + usb_sndbulkpipe(data->usb_dev, + data->bulk_out), + buffer, n_bytes, + &actual, USBTMC_TIMEOUT); + if (retval != 0) + break; + n_bytes -= actual; + } while (n_bytes); data->bTag_last_write = data->bTag; data->bTag++; --- linux-ec2-2.6.31.orig/drivers/usb/class/cdc-wdm.c +++ linux-ec2-2.6.31/drivers/usb/class/cdc-wdm.c @@ -313,8 +313,13 @@ r = usb_autopm_get_interface(desc->intf); if (r < 0) goto outnp; - r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE, - &desc->flags)); + + if (!file->f_flags && O_NONBLOCK) + r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE, + &desc->flags)); + else + if (test_bit(WDM_IN_USE, &desc->flags)) + r = -EAGAIN; if (r < 0) goto out; @@ -377,7 +382,7 @@ static ssize_t wdm_read (struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - int rv, cntr; + int rv, cntr = 0; int i = 0; struct wdm_device *desc = file->private_data; @@ -389,10 +394,23 @@ if (desc->length == 0) { desc->read = 0; retry: + if (test_bit(WDM_DISCONNECTING, &desc->flags)) { + rv = -ENODEV; + goto err; + } i++; - rv = wait_event_interruptible(desc->wait, - test_bit(WDM_READ, &desc->flags)); + if (file->f_flags & O_NONBLOCK) { + if (!test_bit(WDM_READ, &desc->flags)) { + rv = cntr ? cntr : -EAGAIN; + goto err; + } + rv = 0; + } else { + rv = wait_event_interruptible(desc->wait, + test_bit(WDM_READ, &desc->flags)); + } + /* may have happened while we slept */ if (test_bit(WDM_DISCONNECTING, &desc->flags)) { rv = -ENODEV; goto err; @@ -448,7 +466,7 @@ err: mutex_unlock(&desc->rlock); - if (rv < 0) + if (rv < 0 && rv != -EAGAIN) dev_err(&desc->intf->dev, "wdm_read: exit error\n"); return rv; } --- linux-ec2-2.6.31.orig/drivers/usb/storage/usb.c +++ linux-ec2-2.6.31/drivers/usb/storage/usb.c @@ -228,6 +228,7 @@ if (data_len<36) // You lose. return; + memset(data+8, ' ', 28); if(data[0]&0x20) { /* USB device currently not connected. Return peripheral qualifier 001b ("...however, the physical device is not currently connected @@ -237,15 +238,15 @@ device, it may return zeros or ASCII spaces (20h) in those fields until the data is available from the device."). */ - memset(data+8,0,28); } else { u16 bcdDevice = le16_to_cpu(us->pusb_dev->descriptor.bcdDevice); - memcpy(data+8, us->unusual_dev->vendorName, - strlen(us->unusual_dev->vendorName) > 8 ? 8 : - strlen(us->unusual_dev->vendorName)); - memcpy(data+16, us->unusual_dev->productName, - strlen(us->unusual_dev->productName) > 16 ? 16 : - strlen(us->unusual_dev->productName)); + int n; + + n = strlen(us->unusual_dev->vendorName); + memcpy(data+8, us->unusual_dev->vendorName, min(8, n)); + n = strlen(us->unusual_dev->productName); + memcpy(data+16, us->unusual_dev->productName, min(16, n)); + data[32] = 0x30 + ((bcdDevice>>12) & 0x0F); data[33] = 0x30 + ((bcdDevice>>8) & 0x0F); data[34] = 0x30 + ((bcdDevice>>4) & 0x0F); @@ -459,6 +460,9 @@ case 'a': f |= US_FL_SANE_SENSE; break; + case 'b': + f |= US_FL_BAD_SENSE; + break; case 'c': f |= US_FL_FIX_CAPACITY; break; --- linux-ec2-2.6.31.orig/drivers/usb/storage/initializers.c +++ linux-ec2-2.6.31/drivers/usb/storage/initializers.c @@ -102,5 +102,5 @@ USB_TYPE_STANDARD | USB_RECIP_DEVICE, 0x01, 0x0, NULL, 0x0, 1000); US_DEBUGP("Huawei mode set result is %d\n", result); - return (result ? 0 : -ENODEV); + return 0; } --- linux-ec2-2.6.31.orig/drivers/usb/storage/transport.c +++ linux-ec2-2.6.31/drivers/usb/storage/transport.c @@ -666,10 +666,11 @@ * to wait for at least one CHECK_CONDITION to determine * SANE_SENSE support */ - if ((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) && + if (unlikely((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) && result == USB_STOR_TRANSPORT_GOOD && !(us->fflags & US_FL_SANE_SENSE) && - !(srb->cmnd[2] & 0x20)) { + !(us->fflags & US_FL_BAD_SENSE) && + !(srb->cmnd[2] & 0x20))) { US_DEBUGP("-- SAT supported, increasing auto-sense\n"); us->fflags |= US_FL_SANE_SENSE; } @@ -696,7 +697,7 @@ /* device supports and needs bigger sense buffer */ if (us->fflags & US_FL_SANE_SENSE) sense_size = ~0; - +Retry_Sense: US_DEBUGP("Issuing auto-REQUEST_SENSE\n"); scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sense_size); @@ -718,8 +719,30 @@ if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { US_DEBUGP("-- auto-sense aborted\n"); srb->result = DID_ABORT << 16; + + /* If SANE_SENSE caused this problem, disable it */ + if (sense_size != US_SENSE_SIZE) { + us->fflags &= ~US_FL_SANE_SENSE; + us->fflags |= US_FL_BAD_SENSE; + } goto Handle_Errors; } + + /* Some devices claim to support larger sense but fail when + * trying to request it. When a transport failure happens + * using US_FS_SANE_SENSE, we always retry with a standard + * (small) sense request. This fixes some USB GSM modems + */ + if (temp_result == USB_STOR_TRANSPORT_FAILED && + sense_size != US_SENSE_SIZE) { + US_DEBUGP("-- auto-sense failure, retry small sense\n"); + sense_size = US_SENSE_SIZE; + us->fflags &= ~US_FL_SANE_SENSE; + us->fflags |= US_FL_BAD_SENSE; + goto Retry_Sense; + } + + /* Other failures */ if (temp_result != USB_STOR_TRANSPORT_GOOD) { US_DEBUGP("-- auto-sense failure\n"); @@ -739,6 +762,7 @@ */ if (srb->sense_buffer[7] > (US_SENSE_SIZE - 8) && !(us->fflags & US_FL_SANE_SENSE) && + !(us->fflags & US_FL_BAD_SENSE) && (srb->sense_buffer[0] & 0x7C) == 0x70) { US_DEBUGP("-- SANE_SENSE support enabled\n"); us->fflags |= US_FL_SANE_SENSE; @@ -768,17 +792,32 @@ /* set the result so the higher layers expect this data */ srb->result = SAM_STAT_CHECK_CONDITION; - /* If things are really okay, then let's show that. Zero - * out the sense buffer so the higher layers won't realize - * we did an unsolicited auto-sense. */ - if (result == USB_STOR_TRANSPORT_GOOD && - /* Filemark 0, ignore EOM, ILI 0, no sense */ + /* We often get empty sense data. This could indicate that + * everything worked or that there was an unspecified + * problem. We have to decide which. + */ + if ( /* Filemark 0, ignore EOM, ILI 0, no sense */ (srb->sense_buffer[2] & 0xaf) == 0 && /* No ASC or ASCQ */ srb->sense_buffer[12] == 0 && srb->sense_buffer[13] == 0) { - srb->result = SAM_STAT_GOOD; - srb->sense_buffer[0] = 0x0; + + /* If things are really okay, then let's show that. + * Zero out the sense buffer so the higher layers + * won't realize we did an unsolicited auto-sense. + */ + if (result == USB_STOR_TRANSPORT_GOOD) { + srb->result = SAM_STAT_GOOD; + srb->sense_buffer[0] = 0x0; + + /* If there was a problem, report an unspecified + * hardware error to prevent the higher layers from + * entering an infinite retry loop. + */ + } else { + srb->result = DID_ERROR << 16; + srb->sense_buffer[2] = HARDWARE_ERROR; + } } } --- linux-ec2-2.6.31.orig/drivers/usb/storage/onetouch.c +++ linux-ec2-2.6.31/drivers/usb/storage/onetouch.c @@ -163,7 +163,7 @@ usb_kill_urb(onetouch->irq); break; case US_RESUME: - if (usb_submit_urb(onetouch->irq, GFP_KERNEL) != 0) + if (usb_submit_urb(onetouch->irq, GFP_NOIO) != 0) dev_err(&onetouch->irq->dev->dev, "usb_submit_urb failed\n"); break; --- linux-ec2-2.6.31.orig/drivers/usb/storage/unusual_devs.h +++ linux-ec2-2.6.31/drivers/usb/storage/unusual_devs.h @@ -838,6 +838,13 @@ US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_CAPACITY ), +/* Reported by Daniel Kukula */ +UNUSUAL_DEV( 0x067b, 0x1063, 0x0100, 0x0100, + "Prolific Technology, Inc.", + "Prolific Storage Gadget", + US_SC_DEVICE, US_PR_DEVICE, NULL, + US_FL_BAD_SENSE ), + /* Reported by Rogerio Brito */ UNUSUAL_DEV( 0x067b, 0x2317, 0x0001, 0x001, "Prolific Technology, Inc.", @@ -1149,6 +1156,13 @@ US_SC_DEVICE, US_PR_DEVICE, option_ms_init, 0), +/* Reported by Timo Aaltonen */ +UNUSUAL_DEV( 0x0af0, 0x7011, 0x0000, 0x9999, + "Option", + "Mass Storage", + US_SC_DEVICE, US_PR_DEVICE, option_ms_init, + 0 ), + /* Reported by F. Aben * This device (wrongly) has a vendor-specific device descriptor. * The entry is needed so usb-storage can bind to it's mass-storage --- linux-ec2-2.6.31.orig/drivers/usb/misc/emi62.c +++ linux-ec2-2.6.31/drivers/usb/misc/emi62.c @@ -167,7 +167,7 @@ err("%s - error loading firmware: error = %d", __func__, err); goto wraperr; } - } while (i > 0); + } while (rec); /* Assert reset (stop the CPU in the EMI) */ err = emi62_set_reset(dev,1); --- linux-ec2-2.6.31.orig/drivers/usb/misc/appledisplay.c +++ linux-ec2-2.6.31/drivers/usb/misc/appledisplay.c @@ -72,8 +72,8 @@ struct usb_device *udev; /* usb device */ struct urb *urb; /* usb request block */ struct backlight_device *bd; /* backlight device */ - char *urbdata; /* interrupt URB data buffer */ - char *msgdata; /* control message data buffer */ + u8 *urbdata; /* interrupt URB data buffer */ + u8 *msgdata; /* control message data buffer */ struct delayed_work work; int button_pressed; --- linux-ec2-2.6.31.orig/drivers/mtd/ofpart.c +++ linux-ec2-2.6.31/drivers/mtd/ofpart.c @@ -46,21 +46,12 @@ const u32 *reg; int len; - /* check if this is a partition node */ - partname = of_get_property(pp, "name", &len); - if (strcmp(partname, "partition") != 0) { + reg = of_get_property(pp, "reg", &len); + if (!reg) { nr_parts--; continue; } - reg = of_get_property(pp, "reg", &len); - if (!reg || (len != 2 * sizeof(u32))) { - of_node_put(pp); - dev_err(dev, "Invalid 'reg' on %s\n", node->full_name); - kfree(*pparts); - *pparts = NULL; - return -EINVAL; - } (*pparts)[i].offset = reg[0]; (*pparts)[i].size = reg[1]; @@ -75,6 +66,14 @@ i++; } + if (!i) { + of_node_put(pp); + dev_err(dev, "No valid partition found on %s\n", node->full_name); + kfree(*pparts); + *pparts = NULL; + return -EINVAL; + } + return nr_parts; } EXPORT_SYMBOL(of_mtd_parse_partitions); --- linux-ec2-2.6.31.orig/drivers/mtd/nand/ndfc.c +++ linux-ec2-2.6.31/drivers/mtd/nand/ndfc.c @@ -102,8 +102,8 @@ wmb(); ecc = in_be32(ndfc->ndfcbase + NDFC_ECC); /* The NDFC uses Smart Media (SMC) bytes order */ - ecc_code[0] = p[2]; - ecc_code[1] = p[1]; + ecc_code[0] = p[1]; + ecc_code[1] = p[2]; ecc_code[2] = p[3]; return 0; --- linux-ec2-2.6.31.orig/drivers/mtd/chips/cfi_cmdset_0002.c +++ linux-ec2-2.6.31/drivers/mtd/chips/cfi_cmdset_0002.c @@ -282,16 +282,6 @@ } } -static void fixup_M29W128G_write_buffer(struct mtd_info *mtd, void *param) -{ - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; - if (cfi->cfiq->BufWriteTimeoutTyp) { - pr_warning("Don't use write buffer on ST flash M29W128G\n"); - cfi->cfiq->BufWriteTimeoutTyp = 0; - } -} - static struct cfi_fixup cfi_fixup_table[] = { { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL }, #ifdef AMD_BOOTLOC_BUG @@ -308,7 +298,6 @@ { CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors, NULL, }, { CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors, NULL, }, { CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors, NULL, }, - { CFI_MFR_ST, 0x227E, fixup_M29W128G_write_buffer, NULL, }, #if !FORCE_WORD_WRITE { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, }, #endif --- linux-ec2-2.6.31.orig/drivers/mtd/chips/cfi_util.c +++ linux-ec2-2.6.31/drivers/mtd/chips/cfi_util.c @@ -81,6 +81,10 @@ { cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); + /* M29W128G flashes require an additional reset command + when exit qry mode */ + if ((cfi->mfr == CFI_MFR_ST) && (cfi->id == 0x227E || cfi->id == 0x7E)) + cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); } EXPORT_SYMBOL_GPL(cfi_qry_mode_off); --- linux-ec2-2.6.31.orig/drivers/scsi/hosts.c +++ linux-ec2-2.6.31/drivers/scsi/hosts.c @@ -180,14 +180,20 @@ EXPORT_SYMBOL(scsi_remove_host); /** - * scsi_add_host - add a scsi host + * scsi_add_host_with_dma - add a scsi host with dma device * @shost: scsi host pointer to add * @dev: a struct device of type scsi class + * @dma_dev: dma device for the host + * + * Note: You rarely need to worry about this unless you're in a + * virtualised host environments, so use the simpler scsi_add_host() + * function instead. * * Return value: * 0 on success / != 0 for error **/ -int scsi_add_host(struct Scsi_Host *shost, struct device *dev) +int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, + struct device *dma_dev) { struct scsi_host_template *sht = shost->hostt; int error = -EINVAL; @@ -207,6 +213,7 @@ if (!shost->shost_gendev.parent) shost->shost_gendev.parent = dev ? dev : &platform_bus; + shost->dma_dev = dma_dev; error = device_add(&shost->shost_gendev); if (error) @@ -262,7 +269,7 @@ fail: return error; } -EXPORT_SYMBOL(scsi_add_host); +EXPORT_SYMBOL(scsi_add_host_with_dma); static void scsi_host_dev_release(struct device *dev) { --- linux-ec2-2.6.31.orig/drivers/scsi/libsrp.c +++ linux-ec2-2.6.31/drivers/scsi/libsrp.c @@ -124,6 +124,7 @@ dma_free_coherent(dev, size, ring[i]->buf, ring[i]->dma); kfree(ring[i]); } + kfree(ring); } int srp_target_alloc(struct srp_target *target, struct device *dev, --- linux-ec2-2.6.31.orig/drivers/scsi/gdth.c +++ linux-ec2-2.6.31/drivers/scsi/gdth.c @@ -2900,7 +2900,7 @@ eindex = handle; estr->event_source = 0; - if (eindex >= MAX_EVENTS) { + if (eindex < 0 || eindex >= MAX_EVENTS) { spin_unlock_irqrestore(&ha->smp_lock, flags); return eindex; } --- linux-ec2-2.6.31.orig/drivers/scsi/dpt_i2o.c +++ linux-ec2-2.6.31/drivers/scsi/dpt_i2o.c @@ -1918,6 +1918,10 @@ } size = size>>16; size *= 4; + if (size > MAX_MESSAGE_SIZE) { + rcode = -EINVAL; + goto cleanup; + } /* Copy in the user's I2O command */ if (copy_from_user (msg, user_msg, size)) { rcode = -EFAULT; --- linux-ec2-2.6.31.orig/drivers/scsi/scsi_transport_fc.c +++ linux-ec2-2.6.31/drivers/scsi/scsi_transport_fc.c @@ -473,10 +473,30 @@ MODULE_PARM_DESC(dev_loss_tmo, "Maximum number of seconds that the FC transport should" " insulate the loss of a remote port. Once this value is" - " exceeded, the scsi target is removed. Value should be" + " exceeded, the scsi target may be removed. Reference the" + " remove_on_dev_loss module parameter. Value should be" " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT."); /* + * remove_on_dev_loss: controls whether the transport will + * remove a scsi target after the device loss timer expires. + * Removal on disconnect is modeled after the USB subsystem + * and expects subsystems layered on SCSI to be aware of + * potential device loss and handle it appropriately. However, + * many subsystems do not support device removal, leaving situations + * where structure references may remain, causing new device + * name assignments, etc., if the target returns. + */ +static unsigned int fc_remove_on_dev_loss = 0; +module_param_named(remove_on_dev_loss, fc_remove_on_dev_loss, + int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(remove_on_dev_loss, + "Boolean. When the device loss timer fires, this variable" + " controls whether the scsi infrastructure for the target" + " device is removed. Values: zero means do not remove," + " non-zero means remove. Default is zero."); + +/** * Netlink Infrastructure */ @@ -648,11 +668,22 @@ return error; error = transport_class_register(&fc_vport_class); if (error) - return error; + goto unreg_host_class; error = transport_class_register(&fc_rport_class); if (error) - return error; - return transport_class_register(&fc_transport_class); + goto unreg_vport_class; + error = transport_class_register(&fc_transport_class); + if (error) + goto unreg_rport_class; + return 0; + +unreg_rport_class: + transport_class_unregister(&fc_rport_class); +unreg_vport_class: + transport_class_unregister(&fc_vport_class); +unreg_host_class: + transport_class_unregister(&fc_host_class); + return error; } static void __exit fc_transport_exit(void) @@ -2367,7 +2398,8 @@ container_of(work, struct fc_rport, stgt_delete_work); fc_terminate_rport_io(rport); - scsi_remove_target(&rport->dev); + if (fc_remove_on_dev_loss) + scsi_remove_target(&rport->dev); } @@ -3015,9 +3047,13 @@ return; } - dev_printk(KERN_ERR, &rport->dev, - "blocked FC remote port time out: removing target and " - "saving binding\n"); + if (fc_remove_on_dev_loss) + dev_printk(KERN_ERR, &rport->dev, + "blocked FC remote port time out: removing target and " + "saving binding\n"); + else + dev_printk(KERN_ERR, &rport->dev, + "blocked FC remote port time out: saving binding\n"); list_move_tail(&rport->peers, &fc_host->rport_bindings); --- linux-ec2-2.6.31.orig/drivers/scsi/scsi.c +++ linux-ec2-2.6.31/drivers/scsi/scsi.c @@ -241,10 +241,7 @@ */ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask) { - struct scsi_cmnd *cmd; - unsigned char *buf; - - cmd = scsi_host_alloc_command(shost, gfp_mask); + struct scsi_cmnd *cmd = scsi_host_alloc_command(shost, gfp_mask); if (unlikely(!cmd)) { unsigned long flags; @@ -258,9 +255,15 @@ spin_unlock_irqrestore(&shost->free_list_lock, flags); if (cmd) { + void *buf, *prot; + buf = cmd->sense_buffer; + prot = cmd->prot_sdb; + memset(cmd, 0, sizeof(*cmd)); + cmd->sense_buffer = buf; + cmd->prot_sdb = prot; } } --- linux-ec2-2.6.31.orig/drivers/scsi/scsi_lib_dma.c +++ linux-ec2-2.6.31/drivers/scsi/scsi_lib_dma.c @@ -23,7 +23,7 @@ int nseg = 0; if (scsi_sg_count(cmd)) { - struct device *dev = cmd->device->host->shost_gendev.parent; + struct device *dev = cmd->device->host->dma_dev; nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); @@ -41,7 +41,7 @@ void scsi_dma_unmap(struct scsi_cmnd *cmd) { if (scsi_sg_count(cmd)) { - struct device *dev = cmd->device->host->shost_gendev.parent; + struct device *dev = cmd->device->host->dma_dev; dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction); --- linux-ec2-2.6.31.orig/drivers/scsi/scsi_error.c +++ linux-ec2-2.6.31/drivers/scsi/scsi_error.c @@ -721,6 +721,9 @@ case NEEDS_RETRY: case FAILED: break; + case ADD_TO_MLQUEUE: + rtn = NEEDS_RETRY; + break; default: rtn = FAILED; break; --- linux-ec2-2.6.31.orig/drivers/scsi/sd.c +++ linux-ec2-2.6.31/drivers/scsi/sd.c @@ -2021,6 +2021,7 @@ sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); + put_device(&sdkp->dev); } /** @@ -2106,6 +2107,7 @@ get_device(&sdp->sdev_gendev); + get_device(&sdkp->dev); /* prevent release before async_schedule */ async_schedule(sd_probe_async, sdkp); return 0; --- linux-ec2-2.6.31.orig/drivers/scsi/sg.c +++ linux-ec2-2.6.31/drivers/scsi/sg.c @@ -1708,11 +1708,6 @@ Sg_scatter_hold *req_schp = &srp->data; SCSI_LOG_TIMEOUT(4, printk("sg_finish_rem_req: res_used=%d\n", (int) srp->res_used)); - if (srp->res_used) - sg_unlink_reserve(sfp, srp); - else - sg_remove_scat(req_schp); - if (srp->rq) { if (srp->bio) ret = blk_rq_unmap_user(srp->bio); @@ -1720,6 +1715,11 @@ blk_put_request(srp->rq); } + if (srp->res_used) + sg_unlink_reserve(sfp, srp); + else + sg_remove_scat(req_schp); + sg_remove_request(sfp, srp); return ret; @@ -1811,7 +1811,7 @@ return 0; out: for (i = 0; i < k; i++) - __free_pages(schp->pages[k], order); + __free_pages(schp->pages[i], order); if (--order >= 0) goto retry; --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -140,11 +140,18 @@ u32 device_info; u32 ioc_status; + if (ioc->shost_recovery) { + printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", + __func__, ioc->name); + return -EFAULT; + } + if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", + ioc->name, __FILE__, __LINE__, __func__); - return -1; + return -ENXIO; } ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & @@ -153,7 +160,7 @@ printk(MPT2SAS_ERR_FMT "handle(0x%04x), ioc_status(0x%04x)" "\nfailure at %s:%d/%s()!\n", ioc->name, handle, ioc_status, __FILE__, __LINE__, __func__); - return -1; + return -EIO; } memset(identify, 0, sizeof(identify)); @@ -288,21 +295,17 @@ void *psge; u32 sgl_flags; u8 issue_reset = 0; - unsigned long flags; void *data_out = NULL; dma_addr_t data_out_dma; u32 sz; u64 *sas_address_le; u16 wait_state_count; - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->ioc_reset_in_progress) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); + if (ioc->shost_recovery) { printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", __func__, ioc->name); return -EFAULT; } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); mutex_lock(&ioc->transport_cmds.mutex); @@ -806,6 +809,12 @@ struct _sas_node *sas_node; struct _sas_phy *mpt2sas_phy; + if (ioc->shost_recovery) { + printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", + __func__, ioc->name); + return; + } + spin_lock_irqsave(&ioc->sas_node_lock, flags); sas_node = _transport_sas_node_find_by_handle(ioc, handle); spin_unlock_irqrestore(&ioc->sas_node_lock, flags); @@ -1025,7 +1034,6 @@ void *psge; u32 sgl_flags; u8 issue_reset = 0; - unsigned long flags; dma_addr_t dma_addr_in = 0; dma_addr_t dma_addr_out = 0; u16 wait_state_count; @@ -1045,14 +1053,11 @@ return -EINVAL; } - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->ioc_reset_in_progress) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); + if (ioc->shost_recovery) { printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", __func__, ioc->name); return -EFAULT; } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); rc = mutex_lock_interruptible(&ioc->transport_cmds.mutex); if (rc) --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_base.c +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -94,7 +94,7 @@ int rc; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->ioc_reset_in_progress) + if (ioc->shost_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -1542,6 +1542,8 @@ (ioc->bios_pg3.BiosVersion & 0x0000FF00) >> 8, ioc->bios_pg3.BiosVersion & 0x000000FF); + _base_display_dell_branding(ioc); + printk(MPT2SAS_INFO_FMT "Protocol=(", ioc->name); if (ioc->facts.ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_SCSI_INITIATOR) { @@ -1554,8 +1556,6 @@ i++; } - _base_display_dell_branding(ioc); - i = 0; printk("), "); printk("Capabilities=("); @@ -1627,6 +1627,9 @@ u32 iounit_pg1_flags; mpt2sas_config_get_manufacturing_pg0(ioc, &mpi_reply, &ioc->manu_pg0); + if (ioc->ir_firmware) + mpt2sas_config_get_manufacturing_pg10(ioc, &mpi_reply, + &ioc->manu_pg10); mpt2sas_config_get_bios_pg2(ioc, &mpi_reply, &ioc->bios_pg2); mpt2sas_config_get_bios_pg3(ioc, &mpi_reply, &ioc->bios_pg3); mpt2sas_config_get_ioc_pg8(ioc, &mpi_reply, &ioc->ioc_pg8); @@ -3501,20 +3504,13 @@ __func__)); spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->ioc_reset_in_progress) { + if (ioc->shost_recovery) { spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); printk(MPT2SAS_ERR_FMT "%s: busy\n", ioc->name, __func__); return -EBUSY; } - ioc->ioc_reset_in_progress = 1; ioc->shost_recovery = 1; - if (ioc->shost->shost_state == SHOST_RUNNING) { - /* set back to SHOST_RUNNING in mpt2sas_scsih.c */ - scsi_host_set_state(ioc->shost, SHOST_RECOVERY); - printk(MPT2SAS_INFO_FMT "putting controller into " - "SHOST_RECOVERY\n", ioc->name); - } spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); _base_reset_handler(ioc, MPT2_IOC_PRE_RESET); @@ -3534,7 +3530,10 @@ ioc->name, __func__, ((r == 0) ? "SUCCESS" : "FAILED"))); spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - ioc->ioc_reset_in_progress = 0; + ioc->shost_recovery = 0; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); + + if (!r) + _base_reset_handler(ioc, MPT2_IOC_RUNNING); return r; } --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_base.h +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -119,6 +119,7 @@ #define MPT2_IOC_PRE_RESET 1 /* prior to host reset */ #define MPT2_IOC_AFTER_RESET 2 /* just after host reset */ #define MPT2_IOC_DONE_RESET 3 /* links re-initialized */ +#define MPT2_IOC_RUNNING 4 /* shost running */ /* * logging format @@ -196,6 +197,38 @@ * @block: device is in SDEV_BLOCK state * @tlr_snoop_check: flag used in determining whether to disable TLR */ + +/* OEM Identifiers */ +#define MFG10_OEM_ID_INVALID (0x00000000) +#define MFG10_OEM_ID_DELL (0x00000001) +#define MFG10_OEM_ID_FSC (0x00000002) +#define MFG10_OEM_ID_SUN (0x00000003) +#define MFG10_OEM_ID_IBM (0x00000004) + +/* GENERIC Flags 0*/ +#define MFG10_GF0_OCE_DISABLED (0x00000001) +#define MFG10_GF0_R1E_DRIVE_COUNT (0x00000002) +#define MFG10_GF0_R10_DISPLAY (0x00000004) +#define MFG10_GF0_SSD_DATA_SCRUB_DISABLE (0x00000008) +#define MFG10_GF0_SINGLE_DRIVE_R0 (0x00000010) + +/* OEM Specific Flags will come from OEM specific header files */ +typedef struct _MPI2_CONFIG_PAGE_MAN_10 { + MPI2_CONFIG_PAGE_HEADER Header; /* 00h */ + U8 OEMIdentifier; /* 04h */ + U8 Reserved1; /* 05h */ + U16 Reserved2; /* 08h */ + U32 Reserved3; /* 0Ch */ + U32 GenericFlags0; /* 10h */ + U32 GenericFlags1; /* 14h */ + U32 Reserved4; /* 18h */ + U32 OEMSpecificFlags0; /* 1Ch */ + U32 OEMSpecificFlags1; /* 20h */ + U32 Reserved5[18]; /* 24h-60h*/ +} MPI2_CONFIG_PAGE_MAN_10, + MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_10, + Mpi2ManufacturingPage10_t, MPI2_POINTER pMpi2ManufacturingPage10_t; + struct MPT2SAS_DEVICE { struct MPT2SAS_TARGET *sas_target; unsigned int lun; @@ -431,7 +464,7 @@ * @fw_event_list: list of fw events * @aen_event_read_flag: event log was read * @broadcast_aen_busy: broadcast aen waiting to be serviced - * @ioc_reset_in_progress: host reset in progress + * @shost_recovery: host reset in progress * @ioc_reset_in_progress_lock: * @ioc_link_reset_in_progress: phy/hard reset in progress * @ignore_loginfos: ignore loginfos during task managment @@ -460,6 +493,7 @@ * @facts: static facts data * @pfacts: static port facts data * @manu_pg0: static manufacturing page 0 + * @manu_pg10: static manufacturing page 10 * @bios_pg2: static bios page 2 * @bios_pg3: static bios page 3 * @ioc_pg8: static ioc page 8 @@ -544,7 +578,6 @@ /* misc flags */ int aen_event_read_flag; u8 broadcast_aen_busy; - u8 ioc_reset_in_progress; u8 shost_recovery; spinlock_t ioc_reset_in_progress_lock; u8 ioc_link_reset_in_progress; @@ -663,6 +696,7 @@ dma_addr_t diag_buffer_dma[MPI2_DIAG_BUF_TYPE_COUNT]; u8 diag_buffer_status[MPI2_DIAG_BUF_TYPE_COUNT]; u32 unique_id[MPI2_DIAG_BUF_TYPE_COUNT]; + Mpi2ManufacturingPage10_t manu_pg10; u32 product_specific[MPI2_DIAG_BUF_TYPE_COUNT][23]; u32 diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT]; }; @@ -734,6 +768,8 @@ int mpt2sas_config_get_number_hba_phys(struct MPT2SAS_ADAPTER *ioc, u8 *num_phys); int mpt2sas_config_get_manufacturing_pg0(struct MPT2SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage0_t *config_page); +int mpt2sas_config_get_manufacturing_pg10(struct MPT2SAS_ADAPTER *ioc, + Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage10_t *config_page); int mpt2sas_config_get_bios_pg2(struct MPT2SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, Mpi2BiosPage2_t *config_page); int mpt2sas_config_get_bios_pg3(struct MPT2SAS_ADAPTER *ioc, Mpi2ConfigReply_t @@ -776,7 +812,6 @@ u16 *volume_handle); int mpt2sas_config_get_volume_wwid(struct MPT2SAS_ADAPTER *ioc, u16 volume_handle, u64 *wwid); - /* ctl shared API */ extern struct device_attribute *mpt2sas_host_attrs[]; extern struct device_attribute *mpt2sas_dev_attrs[]; @@ -802,5 +837,7 @@ u16 attached_handle, u8 phy_number, u8 link_rate); extern struct sas_function_template mpt2sas_transport_functions; extern struct scsi_transport_template *mpt2sas_transport_template; +extern int scsi_internal_device_block(struct scsi_device *sdev); +extern int scsi_internal_device_unblock(struct scsi_device *sdev); #endif /* MPT2SAS_BASE_H_INCLUDED */ --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_config.c +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_config.c @@ -426,6 +426,67 @@ } /** + * mpt2sas_config_get_manufacturing_pg10 - obtain manufacturing page 10 + * @ioc: per adapter object + * @mpi_reply: reply mf payload returned from firmware + * @config_page: contents of the config page + * Context: sleep. + * + * Returns 0 for success, non-zero for failure. + */ +int +mpt2sas_config_get_manufacturing_pg10(struct MPT2SAS_ADAPTER *ioc, + Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage10_t *config_page) +{ + Mpi2ConfigRequest_t mpi_request; + int r; + struct config_request mem; + + memset(config_page, 0, sizeof(Mpi2ManufacturingPage10_t)); + memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); + mpi_request.Function = MPI2_FUNCTION_CONFIG; + mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; + mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_MANUFACTURING; + mpi_request.Header.PageNumber = 10; + mpi_request.Header.PageVersion = MPI2_MANUFACTURING0_PAGEVERSION; + mpt2sas_base_build_zero_len_sge(ioc, &mpi_request.PageBufferSGE); + r = _config_request(ioc, &mpi_request, mpi_reply, + MPT2_CONFIG_PAGE_DEFAULT_TIMEOUT); + if (r) + goto out; + + mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT; + mpi_request.Header.PageVersion = mpi_reply->Header.PageVersion; + mpi_request.Header.PageNumber = mpi_reply->Header.PageNumber; + mpi_request.Header.PageType = mpi_reply->Header.PageType; + mpi_request.Header.PageLength = mpi_reply->Header.PageLength; + mem.config_page_sz = le16_to_cpu(mpi_reply->Header.PageLength) * 4; + if (mem.config_page_sz > ioc->config_page_sz) { + r = _config_alloc_config_dma_memory(ioc, &mem); + if (r) + goto out; + } else { + mem.config_page_dma = ioc->config_page_dma; + mem.config_page = ioc->config_page; + } + ioc->base_add_sg_single(&mpi_request.PageBufferSGE, + MPT2_CONFIG_COMMON_SGLFLAGS | mem.config_page_sz, + mem.config_page_dma); + r = _config_request(ioc, &mpi_request, mpi_reply, + MPT2_CONFIG_PAGE_DEFAULT_TIMEOUT); + if (!r) + memcpy(config_page, mem.config_page, + min_t(u16, mem.config_page_sz, + sizeof(Mpi2ManufacturingPage10_t))); + + if (mem.config_page_sz > ioc->config_page_sz) + _config_free_config_dma_memory(ioc, &mem); + + out: + return r; +} + +/** * mpt2sas_config_get_bios_pg2 - obtain bios page 2 * @ioc: per adapter object * @mpi_reply: reply mf payload returned from firmware --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_ctl.c +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_ctl.c @@ -1963,7 +1963,6 @@ { enum block_state state; long ret = -EINVAL; - unsigned long flags; state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING; @@ -1989,13 +1988,8 @@ !ioc) return -ENODEV; - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, - flags); + if (ioc->shost_recovery) return -EAGAIN; - } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); if (_IOC_SIZE(cmd) == sizeof(struct mpt2_ioctl_command)) { uarg = arg; @@ -2098,7 +2092,6 @@ struct mpt2_ioctl_command karg; struct MPT2SAS_ADAPTER *ioc; enum block_state state; - unsigned long flags; if (_IOC_SIZE(cmd) != sizeof(struct mpt2_ioctl_command32)) return -EINVAL; @@ -2113,13 +2106,8 @@ if (_ctl_verify_adapter(karg32.hdr.ioc_number, &ioc) == -1 || !ioc) return -ENODEV; - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, - flags); + if (ioc->shost_recovery) return -EAGAIN; - } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); memset(&karg, 0, sizeof(struct mpt2_ioctl_command)); karg.hdr.ioc_number = karg32.hdr.ioc_number; --- linux-ec2-2.6.31.orig/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ linux-ec2-2.6.31/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -103,7 +103,6 @@ }; -#define MPT2SAS_RESCAN_AFTER_HOST_RESET (0xFFFF) /** * struct fw_event_work - firmware event struct * @list: link list framework @@ -1502,7 +1501,13 @@ break; case MPI2_RAID_VOL_TYPE_RAID1E: qdepth = MPT2SAS_RAID_QUEUE_DEPTH; - r_level = "RAID1E"; + if (ioc->manu_pg10.OEMIdentifier && + (ioc->manu_pg10.GenericFlags0 & + MFG10_GF0_R10_DISPLAY) && + !(raid_device->num_pds % 2)) + r_level = "RAID10"; + else + r_level = "RAID1E"; break; case MPI2_RAID_VOL_TYPE_RAID1: qdepth = MPT2SAS_RAID_QUEUE_DEPTH; @@ -1786,17 +1791,18 @@ u32 ioc_state; unsigned long timeleft; u8 VF_ID = 0; - unsigned long flags; - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->tm_cmds.status != MPT2_CMD_NOT_USED || - ioc->shost_recovery) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); + if (ioc->tm_cmds.status != MPT2_CMD_NOT_USED) { + printk(MPT2SAS_INFO_FMT "%s: tm_cmd busy!!!\n", + __func__, ioc->name); + return; + } + + if (ioc->shost_recovery) { printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", __func__, ioc->name); return; } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); ioc_state = mpt2sas_base_get_iocstate(ioc, 0); if (ioc_state & MPI2_DOORBELL_USED) { @@ -2222,7 +2228,7 @@ MPT2SAS_INFO_FMT "SDEV_RUNNING: " "handle(0x%04x)\n", ioc->name, handle)); sas_device_priv_data->block = 0; - scsi_device_set_state(sdev, SDEV_RUNNING); + scsi_internal_device_unblock(sdev); } } } @@ -2251,7 +2257,7 @@ MPT2SAS_INFO_FMT "SDEV_BLOCK: " "handle(0x%04x)\n", ioc->name, handle)); sas_device_priv_data->block = 1; - scsi_device_set_state(sdev, SDEV_BLOCK); + scsi_internal_device_block(sdev); } } } @@ -2327,6 +2333,7 @@ u16 handle; u16 reason_code; u8 phy_number; + u8 link_rate; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); @@ -2337,6 +2344,11 @@ MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) _scsih_block_io_device(ioc, handle); + if (reason_code == MPI2_EVENT_SAS_TOPO_RC_PHY_CHANGED) { + link_rate = event_data->PHY[i].LinkRate >> 4; + if (link_rate >= MPI2_SAS_NEG_LINK_RATE_1_5) + _scsih_ublock_io_device(ioc, handle); + } } } @@ -2405,27 +2417,6 @@ } /** - * _scsih_queue_rescan - queue a topology rescan from user context - * @ioc: per adapter object - * - * Return nothing. - */ -static void -_scsih_queue_rescan(struct MPT2SAS_ADAPTER *ioc) -{ - struct fw_event_work *fw_event; - - if (ioc->wait_for_port_enable_to_complete) - return; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); - if (!fw_event) - return; - fw_event->event = MPT2SAS_RESCAN_AFTER_HOST_RESET; - fw_event->ioc = ioc; - _scsih_fw_event_add(ioc, fw_event); -} - -/** * _scsih_flush_running_cmds - completing outstanding commands. * @ioc: per adapter object * @@ -2456,46 +2447,6 @@ } /** - * mpt2sas_scsih_reset_handler - reset callback handler (for scsih) - * @ioc: per adapter object - * @reset_phase: phase - * - * The handler for doing any required cleanup or initialization. - * - * The reset phase can be MPT2_IOC_PRE_RESET, MPT2_IOC_AFTER_RESET, - * MPT2_IOC_DONE_RESET - * - * Return nothing. - */ -void -mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase) -{ - switch (reset_phase) { - case MPT2_IOC_PRE_RESET: - dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " - "MPT2_IOC_PRE_RESET\n", ioc->name, __func__)); - _scsih_fw_event_off(ioc); - break; - case MPT2_IOC_AFTER_RESET: - dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " - "MPT2_IOC_AFTER_RESET\n", ioc->name, __func__)); - if (ioc->tm_cmds.status & MPT2_CMD_PENDING) { - ioc->tm_cmds.status |= MPT2_CMD_RESET; - mpt2sas_base_free_smid(ioc, ioc->tm_cmds.smid); - complete(&ioc->tm_cmds.done); - } - _scsih_fw_event_on(ioc); - _scsih_flush_running_cmds(ioc); - break; - case MPT2_IOC_DONE_RESET: - dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " - "MPT2_IOC_DONE_RESET\n", ioc->name, __func__)); - _scsih_queue_rescan(ioc); - break; - } -} - -/** * _scsih_setup_eedp - setup MPI request for EEDP transfer * @scmd: pointer to scsi command object * @mpi_request: pointer to the SCSI_IO reqest message frame @@ -2615,7 +2566,6 @@ Mpi2SCSIIORequest_t *mpi_request; u32 mpi_control; u16 smid; - unsigned long flags; scmd->scsi_done = done; sas_device_priv_data = scmd->device->hostdata; @@ -2634,13 +2584,10 @@ } /* see if we are busy with task managment stuff */ - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (sas_target_priv_data->tm_busy || - ioc->shost_recovery || ioc->ioc_link_reset_in_progress) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); + if (sas_target_priv_data->tm_busy) + return SCSI_MLQUEUE_DEVICE_BUSY; + else if (ioc->shost_recovery || ioc->ioc_link_reset_in_progress) return SCSI_MLQUEUE_HOST_BUSY; - } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); if (scmd->sc_data_direction == DMA_FROM_DEVICE) mpi_control = MPI2_SCSIIO_CONTROL_READ; @@ -3436,6 +3383,9 @@ if (!handle) return -1; + if (ioc->shost_recovery) + return -1; + if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0, MPI2_SAS_EXPAND_PGAD_FORM_HNDL, handle))) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", @@ -3572,6 +3522,9 @@ struct _sas_node *sas_expander; unsigned long flags; + if (ioc->shost_recovery) + return; + spin_lock_irqsave(&ioc->sas_node_lock, flags); sas_expander = mpt2sas_scsih_expander_find_by_handle(ioc, handle); spin_unlock_irqrestore(&ioc->sas_node_lock, flags); @@ -3743,6 +3696,8 @@ mutex_unlock(&ioc->tm_cmds.mutex); dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "issue target reset " "done: handle(0x%04x)\n", ioc->name, device_handle)); + if (ioc->shost_recovery) + goto out; } /* SAS_IO_UNIT_CNTR - send REMOVE_DEVICE */ @@ -3765,6 +3720,9 @@ le32_to_cpu(mpi_reply.IOCLogInfo))); out: + + _scsih_ublock_io_device(ioc, handle); + mpt2sas_transport_port_remove(ioc, sas_device->sas_address, sas_device->parent_handle); @@ -3908,6 +3866,8 @@ "expander event\n", ioc->name)); return; } + if (ioc->shost_recovery) + return; if (event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_PHYSTATUS_VACANT) continue; @@ -3942,10 +3902,6 @@ link_rate_); } } - if (reason_code == MPI2_EVENT_SAS_TOPO_RC_PHY_CHANGED) { - if (link_rate_ >= MPI2_SAS_NEG_LINK_RATE_1_5) - _scsih_ublock_io_device(ioc, handle); - } if (reason_code == MPI2_EVENT_SAS_TOPO_RC_TARG_ADDED) { if (link_rate_ < MPI2_SAS_NEG_LINK_RATE_1_5) break; @@ -5156,22 +5112,9 @@ _scsih_remove_unresponding_devices(struct MPT2SAS_ADAPTER *ioc) { struct _sas_device *sas_device, *sas_device_next; - struct _sas_node *sas_expander, *sas_expander_next; + struct _sas_node *sas_expander; struct _raid_device *raid_device, *raid_device_next; - unsigned long flags; - _scsih_search_responding_sas_devices(ioc); - _scsih_search_responding_raid_devices(ioc); - _scsih_search_responding_expanders(ioc); - - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - ioc->shost_recovery = 0; - if (ioc->shost->shost_state == SHOST_RECOVERY) { - printk(MPT2SAS_INFO_FMT "putting controller into " - "SHOST_RUNNING\n", ioc->name); - scsi_host_set_state(ioc->shost, SHOST_RUNNING); - } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); list_for_each_entry_safe(sas_device, sas_device_next, &ioc->sas_device_list, list) { @@ -5207,16 +5150,63 @@ _scsih_raid_device_remove(ioc, raid_device); } - list_for_each_entry_safe(sas_expander, sas_expander_next, - &ioc->sas_expander_list, list) { + retry_expander_search: + sas_expander = NULL; + list_for_each_entry(sas_expander, &ioc->sas_expander_list, list) { if (sas_expander->responding) { sas_expander->responding = 0; continue; } - printk("\tremoving expander: handle(0x%04x), " - " sas_addr(0x%016llx)\n", sas_expander->handle, - (unsigned long long)sas_expander->sas_address); _scsih_expander_remove(ioc, sas_expander->handle); + goto retry_expander_search; + } +} + +/** + * mpt2sas_scsih_reset_handler - reset callback handler (for scsih) + * @ioc: per adapter object + * @reset_phase: phase + * + * The handler for doing any required cleanup or initialization. + * + * The reset phase can be MPT2_IOC_PRE_RESET, MPT2_IOC_AFTER_RESET, + * MPT2_IOC_DONE_RESET + * + * Return nothing. + */ +void +mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase) +{ + switch (reset_phase) { + case MPT2_IOC_PRE_RESET: + dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " + "MPT2_IOC_PRE_RESET\n", ioc->name, __func__)); + _scsih_fw_event_off(ioc); + break; + case MPT2_IOC_AFTER_RESET: + dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " + "MPT2_IOC_AFTER_RESET\n", ioc->name, __func__)); + if (ioc->tm_cmds.status & MPT2_CMD_PENDING) { + ioc->tm_cmds.status |= MPT2_CMD_RESET; + mpt2sas_base_free_smid(ioc, ioc->tm_cmds.smid); + complete(&ioc->tm_cmds.done); + } + _scsih_fw_event_on(ioc); + _scsih_flush_running_cmds(ioc); + break; + case MPT2_IOC_DONE_RESET: + dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " + "MPT2_IOC_DONE_RESET\n", ioc->name, __func__)); + _scsih_sas_host_refresh(ioc, 0); + _scsih_search_responding_sas_devices(ioc); + _scsih_search_responding_raid_devices(ioc); + _scsih_search_responding_expanders(ioc); + break; + case MPT2_IOC_RUNNING: + dtmprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: " + "MPT2_IOC_RUNNING\n", ioc->name, __func__)); + _scsih_remove_unresponding_devices(ioc); + break; } } @@ -5236,14 +5226,6 @@ unsigned long flags; struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; - /* This is invoked by calling _scsih_queue_rescan(). */ - if (fw_event->event == MPT2SAS_RESCAN_AFTER_HOST_RESET) { - _scsih_fw_event_free(ioc, fw_event); - _scsih_sas_host_refresh(ioc, 1); - _scsih_remove_unresponding_devices(ioc); - return; - } - /* the queue is being flushed so ignore this event */ spin_lock_irqsave(&ioc->fw_event_lock, flags); if (ioc->fw_events_off || ioc->remove_host) { @@ -5253,13 +5235,10 @@ } spin_unlock_irqrestore(&ioc->fw_event_lock, flags); - spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); if (ioc->shost_recovery) { - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); _scsih_fw_event_requeue(ioc, fw_event, 1000); return; } - spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); switch (fw_event->event) { case MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST: @@ -5461,6 +5440,8 @@ if (!sas_device) continue; _scsih_remove_device(ioc, sas_device->handle); + if (ioc->shost_recovery) + return; goto retry_device_search; } } @@ -5482,6 +5463,8 @@ if (!expander_sibling) continue; _scsih_expander_remove(ioc, expander_sibling->handle); + if (ioc->shost_recovery) + return; goto retry_expander_search; } } --- linux-ec2-2.6.31.orig/drivers/scsi/lpfc/lpfc_init.c +++ linux-ec2-2.6.31/drivers/scsi/lpfc/lpfc_init.c @@ -2384,7 +2384,7 @@ vport->els_tmofunc.function = lpfc_els_timeout; vport->els_tmofunc.data = (unsigned long)vport; - error = scsi_add_host(shost, dev); + error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) goto out_put_shost; --- linux-ec2-2.6.31.orig/drivers/scsi/qla2xxx/qla_attr.c +++ linux-ec2-2.6.31/drivers/scsi/qla2xxx/qla_attr.c @@ -1654,7 +1654,8 @@ fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN); } - if (scsi_add_host(vha->host, &fc_vport->dev)) { + if (scsi_add_host_with_dma(vha->host, &fc_vport->dev, + &ha->pdev->dev)) { DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n", vha->host_no, vha->vp_idx)); goto vport_create_failed_2; --- linux-ec2-2.6.31.orig/drivers/scsi/megaraid/megaraid_sas.c +++ linux-ec2-2.6.31/drivers/scsi/megaraid/megaraid_sas.c @@ -3032,7 +3032,7 @@ int error = 0, i; void *sense = NULL; dma_addr_t sense_handle; - u32 *sense_ptr; + unsigned long *sense_ptr; memset(kbuff_arr, 0, sizeof(kbuff_arr)); @@ -3109,7 +3109,7 @@ } sense_ptr = - (u32 *) ((unsigned long)cmd->frame + ioc->sense_off); + (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off); *sense_ptr = sense_handle; } @@ -3140,8 +3140,8 @@ * sense_ptr points to the location that has the user * sense buffer address */ - sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw + - ioc->sense_off); + sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw + + ioc->sense_off); if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)), sense, ioc->sense_len)) { @@ -3451,7 +3451,7 @@ return retval; } -static DRIVER_ATTR(poll_mode_io, S_IRUGO|S_IWUGO, +static DRIVER_ATTR(poll_mode_io, S_IRUGO|S_IWUSR, megasas_sysfs_show_poll_mode_io, megasas_sysfs_set_poll_mode_io); --- linux-ec2-2.6.31.orig/drivers/net/e100.c +++ linux-ec2-2.6.31/drivers/net/e100.c @@ -156,6 +156,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ struct mem *mem; dma_addr_t dma_addr; + struct pci_pool *cbs_pool; dma_addr_t cbs_dma_addr; u8 adaptive_ifs; u8 tx_threshold; @@ -1779,9 +1781,7 @@ nic->cb_to_clean = nic->cb_to_clean->next; nic->cbs_avail++; } - pci_free_consistent(nic->pdev, - sizeof(struct cb) * nic->params.cbs.count, - nic->cbs, nic->cbs_dma_addr); + pci_pool_free(nic->cbs_pool, nic->cbs, nic->cbs_dma_addr); nic->cbs = NULL; nic->cbs_avail = 0; } @@ -1799,10 +1799,11 @@ nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL; nic->cbs_avail = 0; - nic->cbs = pci_alloc_consistent(nic->pdev, - sizeof(struct cb) * count, &nic->cbs_dma_addr); + nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL, + &nic->cbs_dma_addr); if (!nic->cbs) return -ENOMEM; + memset(nic->cbs, 0, count * sizeof(struct cb)); for (cb = nic->cbs, i = 0; i < count; cb++, i++) { cb->next = (i + 1 < count) ? cb + 1 : nic->cbs; @@ -1811,7 +1812,6 @@ cb->dma_addr = nic->cbs_dma_addr + i * sizeof(struct cb); cb->link = cpu_to_le32(nic->cbs_dma_addr + ((i+1) % count) * sizeof(struct cb)); - cb->skb = NULL; } nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = nic->cbs; @@ -2827,7 +2827,11 @@ DPRINTK(PROBE, ERR, "Cannot register net device, aborting.\n"); goto err_out_free; } - + nic->cbs_pool = pci_pool_create(netdev->name, + nic->pdev, + nic->params.cbs.count * sizeof(struct cb), + sizeof(u32), + 0); DPRINTK(PROBE, INFO, "addr 0x%llx, irq %d, MAC addr %pM\n", (unsigned long long)pci_resource_start(pdev, use_io ? 1 : 0), pdev->irq, netdev->dev_addr); @@ -2857,6 +2861,7 @@ unregister_netdev(netdev); e100_free(nic); pci_iounmap(pdev, nic->csr); + pci_pool_destroy(nic->cbs_pool); free_netdev(netdev); pci_release_regions(pdev); pci_disable_device(pdev); --- linux-ec2-2.6.31.orig/drivers/net/iseries_veth.c +++ linux-ec2-2.6.31/drivers/net/iseries_veth.c @@ -495,7 +495,7 @@ cnx->remote_lp); } else { memcpy(&cnx->cap_ack_event, event, - sizeof(&cnx->cap_ack_event)); + sizeof(cnx->cap_ack_event)); cnx->state |= VETH_STATE_GOTCAPACK; veth_kick_statemachine(cnx); } --- linux-ec2-2.6.31.orig/drivers/net/smsc9420.c +++ linux-ec2-2.6.31/drivers/net/smsc9420.c @@ -252,6 +252,9 @@ { struct smsc9420_pdata *pd = netdev_priv(dev); + if (!pd->phy_dev) + return -ENODEV; + cmd->maxtxpkt = 1; cmd->maxrxpkt = 1; return phy_ethtool_gset(pd->phy_dev, cmd); @@ -262,6 +265,9 @@ { struct smsc9420_pdata *pd = netdev_priv(dev); + if (!pd->phy_dev) + return -ENODEV; + return phy_ethtool_sset(pd->phy_dev, cmd); } @@ -290,6 +296,10 @@ static int smsc9420_ethtool_nway_reset(struct net_device *netdev) { struct smsc9420_pdata *pd = netdev_priv(netdev); + + if (!pd->phy_dev) + return -ENODEV; + return phy_start_aneg(pd->phy_dev); } @@ -312,6 +322,10 @@ for (i = 0; i < 0x100; i += (sizeof(u32))) data[j++] = smsc9420_reg_read(pd, i); + // cannot read phy registers if the net device is down + if (!phy_dev) + return; + for (i = 0; i <= 31; i++) data[j++] = smsc9420_mii_read(phy_dev->bus, phy_dev->addr, i); } --- linux-ec2-2.6.31.orig/drivers/net/tg3.c +++ linux-ec2-2.6.31/drivers/net/tg3.c @@ -223,7 +223,7 @@ {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57760)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57720)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57788)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, @@ -11485,6 +11485,9 @@ else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 && tp->pdev->device == TG3PCI_DEVICE_TIGON3_57790) strcpy(tp->board_part_number, "BCM57790"); + else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 && + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57788) + strcpy(tp->board_part_number, "BCM57788"); else strcpy(tp->board_part_number, "none"); } --- linux-ec2-2.6.31.orig/drivers/net/au1000_eth.c +++ linux-ec2-2.6.31/drivers/net/au1000_eth.c @@ -1089,7 +1089,14 @@ return NULL; } - if ((err = register_netdev(dev)) != 0) { + dev->base_addr = base; + dev->irq = irq; + dev->netdev_ops = &au1000_netdev_ops; + SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops); + dev->watchdog_timeo = ETH_TX_TIMEOUT; + + err = register_netdev(dev); + if (err != 0) { printk(KERN_ERR "%s: Cannot register net device, error %d\n", DRV_NAME, err); free_netdev(dev); @@ -1207,12 +1214,6 @@ aup->tx_db_inuse[i] = pDB; } - dev->base_addr = base; - dev->irq = irq; - dev->netdev_ops = &au1000_netdev_ops; - SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops); - dev->watchdog_timeo = ETH_TX_TIMEOUT; - /* * The boot code uses the ethernet controller, so reset it to start * fresh. au1000_init() expects that the device is in reset state. --- linux-ec2-2.6.31.orig/drivers/net/xen-netfront.c +++ linux-ec2-2.6.31/drivers/net/xen-netfront.c @@ -36,8 +36,6 @@ #include #include #include -#include -#include #include #include #include @@ -768,45 +766,6 @@ return cons; } -static int skb_checksum_setup(struct sk_buff *skb) -{ - struct iphdr *iph; - unsigned char *th; - int err = -EPROTO; - - if (skb->protocol != htons(ETH_P_IP)) - goto out; - - iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - - skb->csum_start = th - skb->head; - switch (iph->protocol) { - case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); - break; - default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", iph->protocol); - goto out; - } - - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - - err = 0; - -out: - return err; -} - static int handle_incoming_queue(struct net_device *dev, struct sk_buff_head *rxq) { @@ -1793,7 +1752,6 @@ static struct xenbus_driver netfront_driver = { .name = "vif", - .owner = THIS_MODULE, .ids = netfront_ids, .probe = netfront_probe, .remove = __devexit_p(xennet_remove), --- linux-ec2-2.6.31.orig/drivers/net/tg3.h +++ linux-ec2-2.6.31/drivers/net/tg3.h @@ -43,7 +43,7 @@ #define TG3PCI_DEVICE_TIGON3_57780 0x1692 #define TG3PCI_DEVICE_TIGON3_57760 0x1690 #define TG3PCI_DEVICE_TIGON3_57790 0x1694 -#define TG3PCI_DEVICE_TIGON3_57720 0x168c +#define TG3PCI_DEVICE_TIGON3_57788 0x1691 /* 0x04 --> 0x64 unused */ #define TG3PCI_MSI_DATA 0x00000064 /* 0x66 --> 0x68 unused */ --- linux-ec2-2.6.31.orig/drivers/net/Kconfig +++ linux-ec2-2.6.31/drivers/net/Kconfig @@ -2762,9 +2762,9 @@ source "drivers/s390/net/Kconfig" -config XEN_NETDEV_FRONTEND +config XEN_NETFRONT tristate "Xen network device frontend driver" - depends on XEN + depends on PARAVIRT_XEN default y help The network device frontend driver allows the kernel to --- linux-ec2-2.6.31.orig/drivers/net/sky2.c +++ linux-ec2-2.6.31/drivers/net/sky2.c @@ -1455,7 +1455,6 @@ if (ramsize > 0) { u32 rxspace; - hw->flags |= SKY2_HW_RAM_BUFFER; pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize); if (ramsize < 16) rxspace = ramsize / 2; @@ -2942,6 +2941,9 @@ ++hw->ports; } + if (sky2_read8(hw, B2_E_0)) + hw->flags |= SKY2_HW_RAM_BUFFER; + return 0; } @@ -4526,6 +4528,8 @@ goto err_out_free_netdev; } + netif_carrier_off(dev); + netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT); err = request_irq(pdev->irq, sky2_intr, --- linux-ec2-2.6.31.orig/drivers/net/tun.c +++ linux-ec2-2.6.31/drivers/net/tun.c @@ -943,8 +943,6 @@ char *name; unsigned long flags = 0; - err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -958,7 +956,7 @@ flags |= TUN_TAP_DEV; name = "tap%d"; } else - goto failed; + return -EINVAL; if (*ifr->ifr_name) name = ifr->ifr_name; --- linux-ec2-2.6.31.orig/drivers/net/acenic.c +++ linux-ec2-2.6.31/drivers/net/acenic.c @@ -1209,7 +1209,8 @@ memset(ap->info, 0, sizeof(struct ace_info)); memset(ap->skb, 0, sizeof(struct ace_skb)); - if (ace_load_firmware(dev)) + ecode = ace_load_firmware(dev); + if (ecode) goto init_error; ap->fw_running = 0; --- linux-ec2-2.6.31.orig/drivers/net/smc91x.c +++ linux-ec2-2.6.31/drivers/net/smc91x.c @@ -2283,7 +2283,7 @@ ndev->irq = ires->start; - if (ires->flags & IRQF_TRIGGER_MASK) + if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK) irq_flags = ires->flags & IRQF_TRIGGER_MASK; ret = smc_request_attrib(pdev, ndev); --- linux-ec2-2.6.31.orig/drivers/net/Makefile +++ linux-ec2-2.6.31/drivers/net/Makefile @@ -157,7 +157,7 @@ obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLHC) += slhc.o -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o +obj-$(CONFIG_XEN_NETFRONT) += xen-netfront.o obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_IFB) += ifb.o --- linux-ec2-2.6.31.orig/drivers/net/r8169.c +++ linux-ec2-2.6.31/drivers/net/r8169.c @@ -182,7 +182,12 @@ MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int rx_copybreak = 200; +/* + * we set our copybreak very high so that we don't have + * to allocate 16k frames all the time (see note in + * rtl8169_open() + */ +static int rx_copybreak = 16383; static int use_dac; static struct { u32 msg_enable; @@ -2163,6 +2168,13 @@ dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; #endif + /* Ubuntu temporary workaround for bug #76489, disable + * NETIF_F_TSO by default for RTL8111/8168B chipsets. + * People can re-enable if required */ + if (tp->mac_version == RTL_GIGA_MAC_VER_11 + || tp->mac_version == RTL_GIGA_MAC_VER_12) + dev->features &= ~NETIF_F_TSO; + tp->intr_mask = 0xffff; tp->align = cfg->align; tp->hw_start = cfg->hw_start; @@ -2227,11 +2239,15 @@ } static void rtl8169_set_rxbufsize(struct rtl8169_private *tp, - struct net_device *dev) + unsigned int mtu) { - unsigned int mtu = dev->mtu; + unsigned int max_frame = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; - tp->rx_buf_sz = (mtu > RX_BUF_SIZE) ? mtu + ETH_HLEN + 8 : RX_BUF_SIZE; + if (max_frame != 16383) + printk(KERN_WARNING "WARNING! Changing of MTU on this NIC" + "May lead to frame reception errors!\n"); + + tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE; } static int rtl8169_open(struct net_device *dev) @@ -2241,7 +2257,17 @@ int retval = -ENOMEM; - rtl8169_set_rxbufsize(tp, dev); + /* + * Note that we use a magic value here, its wierd I know + * its done because, some subset of rtl8169 hardware suffers from + * a problem in which frames received that are longer than + * the size set in RxMaxSize register return garbage sizes + * when received. To avoid this we need to turn off filtering, + * which is done by setting a value of 16383 in the RxMaxSize register + * and allocating 16k frames to handle the largest possible rx value + * thats what the magic math below does. + */ + rtl8169_set_rxbufsize(tp, 16383 - VLAN_ETH_HLEN - ETH_FCS_LEN); /* * Rx and Tx desscriptors needs 256 bytes alignment. @@ -2892,7 +2918,7 @@ rtl8169_down(dev); - rtl8169_set_rxbufsize(tp, dev); + rtl8169_set_rxbufsize(tp, dev->mtu); ret = rtl8169_init_ring(dev); if (ret < 0) --- linux-ec2-2.6.31.orig/drivers/net/b44.c +++ linux-ec2-2.6.31/drivers/net/b44.c @@ -913,9 +913,6 @@ bp->istat = istat; __b44_disable_ints(bp); __napi_schedule(&bp->napi); - } else { - printk(KERN_ERR PFX "%s: Error, poll already scheduled\n", - dev->name); } irq_ack: @@ -1505,8 +1502,7 @@ for (k = 0; k< ethaddr_bytes; k++) { ppattern[offset + magicsync + (j * ETH_ALEN) + k] = macaddr[k]; - len++; - set_bit(len, (unsigned long *) pmask); + set_bit(len++, (unsigned long *) pmask); } } return len - 1; --- linux-ec2-2.6.31.orig/drivers/net/wireless/ray_cs.c +++ linux-ec2-2.6.31/drivers/net/wireless/ray_cs.c @@ -2878,7 +2878,7 @@ unsigned long count, void *data) { static char proc_essid[33]; - int len = count; + unsigned int len = count; if (len > 32) len = 32; --- linux-ec2-2.6.31.orig/drivers/net/wireless/ipw2x00/ipw2100.c +++ linux-ec2-2.6.31/drivers/net/wireless/ipw2x00/ipw2100.c @@ -6487,6 +6487,16 @@ } #endif +static void ipw2100_shutdown(struct pci_dev *pci_dev) +{ + struct ipw2100_priv *priv = pci_get_drvdata(pci_dev); + + /* Take down the device; powers it off, etc. */ + ipw2100_down(priv); + + pci_disable_device(pci_dev); +} + #define IPW2100_DEV_ID(x) { PCI_VENDOR_ID_INTEL, 0x1043, 0x8086, x } static struct pci_device_id ipw2100_pci_id_table[] __devinitdata = { @@ -6550,6 +6560,7 @@ .suspend = ipw2100_suspend, .resume = ipw2100_resume, #endif + .shutdown = ipw2100_shutdown, }; /** --- linux-ec2-2.6.31.orig/drivers/net/wireless/ipw2x00/ipw2200.c +++ linux-ec2-2.6.31/drivers/net/wireless/ipw2x00/ipw2200.c @@ -89,7 +89,7 @@ static u32 ipw_debug_level; static int associate; static int auto_create = 1; -static int led = 0; +static int led = 1; static int disable = 0; static int bt_coexist = 0; static int hwcrypto = 0; @@ -11964,7 +11964,7 @@ MODULE_PARM_DESC(auto_create, "auto create adhoc network (default on)"); module_param(led, int, 0444); -MODULE_PARM_DESC(led, "enable led control on some systems (default 0 off)"); +MODULE_PARM_DESC(led, "enable led control on some systems (default 1 on)"); module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "debug output mask"); --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ar9170/usb.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ar9170/usb.c @@ -64,6 +64,8 @@ { USB_DEVICE(0x0cf3, 0x9170) }, /* Atheros TG121N */ { USB_DEVICE(0x0cf3, 0x1001) }, + /* TP-Link TL-WN821N v2 */ + { USB_DEVICE(0x0cf3, 0x1002) }, /* Cace Airpcap NX */ { USB_DEVICE(0xcace, 0x0300) }, /* D-Link DWA 160A */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/reset.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/reset.c @@ -258,29 +258,35 @@ if (!set_chip) goto commit; - /* Preserve sleep duration */ data = ath5k_hw_reg_read(ah, AR5K_SLEEP_CTL); + + /* If card is down we 'll get 0xffff... so we + * need to clean this up before we write the register + */ if (data & 0xffc00000) data = 0; else - data = data & 0xfffcffff; + /* Preserve sleep duration etc */ + data = data & ~AR5K_SLEEP_CTL_SLE; - ath5k_hw_reg_write(ah, data, AR5K_SLEEP_CTL); + ath5k_hw_reg_write(ah, data | AR5K_SLEEP_CTL_SLE_WAKE, + AR5K_SLEEP_CTL); udelay(15); - for (i = 50; i > 0; i--) { + for (i = 200; i > 0; i--) { /* Check if the chip did wake up */ if ((ath5k_hw_reg_read(ah, AR5K_PCICFG) & AR5K_PCICFG_SPWR_DN) == 0) break; /* Wait a bit and retry */ - udelay(200); - ath5k_hw_reg_write(ah, data, AR5K_SLEEP_CTL); + udelay(50); + ath5k_hw_reg_write(ah, data | AR5K_SLEEP_CTL_SLE_WAKE, + AR5K_SLEEP_CTL); } /* Fail if the chip didn't wake up */ - if (i <= 0) + if (i == 0) return -EIO; break; @@ -297,6 +303,64 @@ } /* + * Put device on hold + * + * Put MAC and Baseband on warm reset and + * keep that state (don't clean sleep control + * register). After this MAC and Baseband are + * disabled and a full reset is needed to come + * back. This way we save as much power as possible + * without puting the card on full sleep. + */ +int ath5k_hw_on_hold(struct ath5k_hw *ah) +{ + struct pci_dev *pdev = ah->ah_sc->pdev; + u32 bus_flags; + int ret; + + /* Make sure device is awake */ + ret = ath5k_hw_set_power(ah, AR5K_PM_AWAKE, true, 0); + if (ret) { + ATH5K_ERR(ah->ah_sc, "failed to wakeup the MAC Chip\n"); + return ret; + } + + /* + * Put chipset on warm reset... + * + * Note: puting PCI core on warm reset on PCI-E cards + * results card to hang and always return 0xffff... so + * we ingore that flag for PCI-E cards. On PCI cards + * this flag gets cleared after 64 PCI clocks. + */ + bus_flags = (pdev->is_pcie) ? 0 : AR5K_RESET_CTL_PCI; + + if (ah->ah_version == AR5K_AR5210) { + ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | + AR5K_RESET_CTL_MAC | AR5K_RESET_CTL_DMA | + AR5K_RESET_CTL_PHY | AR5K_RESET_CTL_PCI); + mdelay(2); + } else { + ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | + AR5K_RESET_CTL_BASEBAND | bus_flags); + } + + if (ret) { + ATH5K_ERR(ah->ah_sc, "failed to put device on warm reset\n"); + return -EIO; + } + + /* ...wakeup again!*/ + ret = ath5k_hw_set_power(ah, AR5K_PM_AWAKE, true, 0); + if (ret) { + ATH5K_ERR(ah->ah_sc, "failed to put device on hold\n"); + return ret; + } + + return ret; +} + +/* * Bring up MAC + PHY Chips and program PLL * TODO: Half/Quarter rate support */ @@ -319,6 +383,50 @@ return ret; } + /* + * Put chipset on warm reset... + * + * Note: puting PCI core on warm reset on PCI-E cards + * results card to hang and always return 0xffff... so + * we ingore that flag for PCI-E cards. On PCI cards + * this flag gets cleared after 64 PCI clocks. + */ + bus_flags = (pdev->is_pcie) ? 0 : AR5K_RESET_CTL_PCI; + + if (ah->ah_version == AR5K_AR5210) { + ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | + AR5K_RESET_CTL_MAC | AR5K_RESET_CTL_DMA | + AR5K_RESET_CTL_PHY | AR5K_RESET_CTL_PCI); + mdelay(2); + } else { + ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | + AR5K_RESET_CTL_BASEBAND | bus_flags); + } + + if (ret) { + ATH5K_ERR(ah->ah_sc, "failed to reset the MAC Chip\n"); + return -EIO; + } + + /* ...wakeup again!...*/ + ret = ath5k_hw_set_power(ah, AR5K_PM_AWAKE, true, 0); + if (ret) { + ATH5K_ERR(ah->ah_sc, "failed to resume the MAC Chip\n"); + return ret; + } + + /* ...clear reset control register and pull device out of + * warm reset */ + if (ath5k_hw_nic_reset(ah, 0)) { + ATH5K_ERR(ah->ah_sc, "failed to warm reset the MAC Chip\n"); + return -EIO; + } + + /* On initialization skip PLL programming since we don't have + * a channel / mode set yet */ + if (initial) + return 0; + if (ah->ah_version != AR5K_AR5210) { /* * Get channel mode flags @@ -384,39 +492,6 @@ AR5K_PHY_TURBO); } - /* reseting PCI on PCI-E cards results card to hang - * and always return 0xffff... so we ingore that flag - * for PCI-E cards */ - bus_flags = (pdev->is_pcie) ? 0 : AR5K_RESET_CTL_PCI; - - /* Reset chipset */ - if (ah->ah_version == AR5K_AR5210) { - ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | - AR5K_RESET_CTL_MAC | AR5K_RESET_CTL_DMA | - AR5K_RESET_CTL_PHY | AR5K_RESET_CTL_PCI); - mdelay(2); - } else { - ret = ath5k_hw_nic_reset(ah, AR5K_RESET_CTL_PCU | - AR5K_RESET_CTL_BASEBAND | bus_flags); - } - if (ret) { - ATH5K_ERR(ah->ah_sc, "failed to reset the MAC Chip\n"); - return -EIO; - } - - /* ...wakeup again!*/ - ret = ath5k_hw_set_power(ah, AR5K_PM_AWAKE, true, 0); - if (ret) { - ATH5K_ERR(ah->ah_sc, "failed to resume the MAC Chip\n"); - return ret; - } - - /* ...final warm reset */ - if (ath5k_hw_nic_reset(ah, 0)) { - ATH5K_ERR(ah->ah_sc, "failed to warm reset the MAC Chip\n"); - return -EIO; - } - if (ah->ah_version != AR5K_AR5210) { /* ...update PLL if needed */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/base.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/base.c @@ -666,7 +666,6 @@ ath5k_led_off(sc); - free_irq(pdev->irq, sc); pci_save_state(pdev); pci_disable_device(pdev); pci_set_power_state(pdev, PCI_D3hot); @@ -694,18 +693,8 @@ */ pci_write_config_byte(pdev, 0x41, 0); - err = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc); - if (err) { - ATH5K_ERR(sc, "request_irq failed\n"); - goto err_no_irq; - } - ath5k_led_enable(sc); return 0; - -err_no_irq: - pci_disable_device(pdev); - return err; } #endif /* CONFIG_PM */ @@ -2445,27 +2434,29 @@ ret = ath5k_stop_locked(sc); if (ret == 0 && !test_bit(ATH_STAT_INVALID, sc->status)) { /* - * Set the chip in full sleep mode. Note that we are - * careful to do this only when bringing the interface - * completely to a stop. When the chip is in this state - * it must be carefully woken up or references to - * registers in the PCI clock domain may freeze the bus - * (and system). This varies by chip and is mostly an - * issue with newer parts that go to sleep more quickly. - */ - if (sc->ah->ah_mac_srev >= 0x78) { - /* - * XXX - * don't put newer MAC revisions > 7.8 to sleep because - * of the above mentioned problems - */ - ATH5K_DBG(sc, ATH5K_DEBUG_RESET, "mac version > 7.8, " - "not putting device to sleep\n"); - } else { - ATH5K_DBG(sc, ATH5K_DEBUG_RESET, - "putting device to full sleep\n"); - ath5k_hw_set_power(sc->ah, AR5K_PM_FULL_SLEEP, true, 0); - } + * Don't set the card in full sleep mode! + * + * a) When the device is in this state it must be carefully + * woken up or references to registers in the PCI clock + * domain may freeze the bus (and system). This varies + * by chip and is mostly an issue with newer parts + * (madwifi sources mentioned srev >= 0x78) that go to + * sleep more quickly. + * + * b) On older chips full sleep results a weird behaviour + * during wakeup. I tested various cards with srev < 0x78 + * and they don't wake up after module reload, a second + * module reload is needed to bring the card up again. + * + * Until we figure out what's going on don't enable + * full chip reset on any chip (this is what Legacy HAL + * and Sam's HAL do anyway). Instead Perform a full reset + * on the device (same as initial state after attach) and + * leave it idle (keep MAC/BB on warm reset) */ + ret = ath5k_hw_on_hold(sc->ah); + + ATH5K_DBG(sc, ATH5K_DEBUG_RESET, + "putting device to sleep\n"); } ath5k_txbuf_free(sc, sc->bbuf); @@ -2676,7 +2667,7 @@ sc->curchan = chan; sc->curband = &sc->sbands[chan->band]; } - ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, true); + ret = ath5k_hw_reset(ah, sc->opmode, sc->curchan, chan != NULL); if (ret) { ATH5K_ERR(sc, "can't reset hardware (%d)\n", ret); goto err; --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/attach.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/attach.c @@ -145,7 +145,7 @@ goto err_free; /* Bring device out of sleep and reset it's units */ - ret = ath5k_hw_nic_wakeup(ah, CHANNEL_B, true); + ret = ath5k_hw_nic_wakeup(ah, 0, true); if (ret) goto err_free; --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/eeprom.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/eeprom.c @@ -97,6 +97,7 @@ struct ath5k_eeprom_info *ee = &ah->ah_capabilities.cap_eeprom; int ret; u16 val; + u32 cksum, offset, eep_max = AR5K_EEPROM_INFO_MAX; /* * Read values from EEPROM and store them in the capability structure @@ -111,20 +112,44 @@ if (ah->ah_ee_version < AR5K_EEPROM_VERSION_3_0) return 0; -#ifdef notyet /* * Validate the checksum of the EEPROM date. There are some * devices with invalid EEPROMs. */ - for (cksum = 0, offset = 0; offset < AR5K_EEPROM_INFO_MAX; offset++) { + AR5K_EEPROM_READ(AR5K_EEPROM_SIZE_UPPER, val); + if (val) { + eep_max = (val & AR5K_EEPROM_SIZE_UPPER_MASK) << + AR5K_EEPROM_SIZE_ENDLOC_SHIFT; + AR5K_EEPROM_READ(AR5K_EEPROM_SIZE_LOWER, val); + eep_max = (eep_max | val) - AR5K_EEPROM_INFO_BASE; + + /* + * Fail safe check to prevent stupid loops due + * to busted EEPROMs. XXX: This value is likely too + * big still, waiting on a better value. + */ + if (eep_max > (3 * AR5K_EEPROM_INFO_MAX)) { + ATH5K_ERR(ah->ah_sc, "Invalid max custom EEPROM size: " + "%d (0x%04x) max expected: %d (0x%04x)\n", + eep_max, eep_max, + 3 * AR5K_EEPROM_INFO_MAX, + 3 * AR5K_EEPROM_INFO_MAX); + return -EIO; + } + } + + for (cksum = 0, offset = 0; offset < eep_max; offset++) { AR5K_EEPROM_READ(AR5K_EEPROM_INFO(offset), val); cksum ^= val; } if (cksum != AR5K_EEPROM_INFO_CKSUM) { - ATH5K_ERR(ah->ah_sc, "Invalid EEPROM checksum 0x%04x\n", cksum); + ATH5K_ERR(ah->ah_sc, "Invalid EEPROM " + "checksum: 0x%04x eep_max: 0x%04x (%s)\n", + cksum, eep_max, + eep_max == AR5K_EEPROM_INFO_MAX ? + "default size" : "custom size"); return -EIO; } -#endif AR5K_EEPROM_READ_HDR(AR5K_EEPROM_ANT_GAIN(ah->ah_ee_version), ee_ant_gain); --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/eeprom.h +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/eeprom.h @@ -34,6 +34,14 @@ #define AR5K_EEPROM_RFKILL_POLARITY_S 1 #define AR5K_EEPROM_REG_DOMAIN 0x00bf /* EEPROM regdom */ + +/* FLASH(EEPROM) Defines for AR531X chips */ +#define AR5K_EEPROM_SIZE_LOWER 0x1b /* size info -- lower */ +#define AR5K_EEPROM_SIZE_UPPER 0x1c /* size info -- upper */ +#define AR5K_EEPROM_SIZE_UPPER_MASK 0xfff0 +#define AR5K_EEPROM_SIZE_UPPER_SHIFT 4 +#define AR5K_EEPROM_SIZE_ENDLOC_SHIFT 12 + #define AR5K_EEPROM_CHECKSUM 0x00c0 /* EEPROM checksum */ #define AR5K_EEPROM_INFO_BASE 0x00c0 /* EEPROM header */ #define AR5K_EEPROM_INFO_MAX (0x400 - AR5K_EEPROM_INFO_BASE) --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/phy.c +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/phy.c @@ -740,13 +740,22 @@ AR5K_RF_XPD_GAIN, true); } else { - /* TODO: Set high and low gain bits */ - ath5k_hw_rfb_op(ah, rf_regs, - ee->ee_x_gain[ee_mode], + u8 *pdg_curve_to_idx = ee->ee_pdc_to_idx[ee_mode]; + if (ee->ee_pd_gains[ee_mode] > 1) { + ath5k_hw_rfb_op(ah, rf_regs, + pdg_curve_to_idx[0], AR5K_RF_PD_GAIN_LO, true); - ath5k_hw_rfb_op(ah, rf_regs, - ee->ee_x_gain[ee_mode], + ath5k_hw_rfb_op(ah, rf_regs, + pdg_curve_to_idx[1], AR5K_RF_PD_GAIN_HI, true); + } else { + ath5k_hw_rfb_op(ah, rf_regs, + pdg_curve_to_idx[0], + AR5K_RF_PD_GAIN_LO, true); + ath5k_hw_rfb_op(ah, rf_regs, + pdg_curve_to_idx[0], + AR5K_RF_PD_GAIN_HI, true); + } /* Lower synth voltage on Rev 2 */ ath5k_hw_rfb_op(ah, rf_regs, 2, @@ -1897,8 +1906,9 @@ s16 min_pwrL, min_pwrR; s16 pwr_i; - if (WARN_ON(stepL[0] == stepL[1] || stepR[0] == stepR[1])) - return 0; + /* Some vendors write the same pcdac value twice !!! */ + if (stepL[0] == stepL[1] || stepR[0] == stepR[1]) + return max(pwrL[0], pwrR[0]); if (pwrL[0] == pwrL[1]) min_pwrL = pwrL[0]; @@ -2921,8 +2931,6 @@ ATH5K_ERR(ah->ah_sc, "invalid tx power: %u\n", txpower); return -EINVAL; } - if (txpower == 0) - txpower = AR5K_TUNE_DEFAULT_TXPOWER; /* Reset TX power values */ memset(&ah->ah_txpower, 0, sizeof(ah->ah_txpower)); --- linux-ec2-2.6.31.orig/drivers/net/wireless/ath/ath5k/ath5k.h +++ linux-ec2-2.6.31/drivers/net/wireless/ath/ath5k/ath5k.h @@ -1164,6 +1164,7 @@ /* Reset Functions */ extern int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial); +extern int ath5k_hw_on_hold(struct ath5k_hw *ah); extern int ath5k_hw_reset(struct ath5k_hw *ah, enum nl80211_iftype op_mode, struct ieee80211_channel *channel, bool change_channel); /* Power management functions */ extern int ath5k_hw_set_power(struct ath5k_hw *ah, enum ath5k_power_mode mode, bool set_chip, u16 sleep_duration); --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-prph.h +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-prph.h @@ -80,6 +80,8 @@ #define APMG_RFKILL_REG (APMG_BASE + 0x0014) #define APMG_RTC_INT_STT_REG (APMG_BASE + 0x001c) #define APMG_RTC_INT_MSK_REG (APMG_BASE + 0x0020) +#define APMG_DIGITAL_SVR_REG (APMG_BASE + 0x0058) +#define APMG_ANALOG_SVR_REG (APMG_BASE + 0x006C) #define APMG_CLK_VAL_DMA_CLK_RQT (0x00000200) #define APMG_CLK_VAL_BSM_CLK_RQT (0x00000800) @@ -91,7 +93,8 @@ #define APMG_PS_CTRL_VAL_PWR_SRC_VMAIN (0x00000000) #define APMG_PS_CTRL_VAL_PWR_SRC_MAX (0x01000000) /* 3945 only */ #define APMG_PS_CTRL_VAL_PWR_SRC_VAUX (0x02000000) - +#define APMG_SVR_VOLTAGE_CONFIG_BIT_MSK (0x000001E0) /* bit 8:5 */ +#define APMG_SVR_DIGITAL_VOLTAGE_1_32 (0x00000060) #define APMG_PCIDEV_STT_VAL_L1_ACT_DIS (0x00000800) --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-eeprom.h +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-eeprom.h @@ -180,8 +180,14 @@ #define EEPROM_5050_EEPROM_VERSION (0x21E) /* OTP */ -#define OTP_LOWER_BLOCKS_TOTAL (3) -#define OTP_BLOCK_SIZE (0x400) +/* lower blocks contain EEPROM image and calibration data */ +#define OTP_LOW_IMAGE_SIZE (2 * 512 * sizeof(u16)) /* 2 KB */ +/* high blocks contain PAPD data */ +#define OTP_HIGH_IMAGE_SIZE_6x00 (6 * 512 * sizeof(u16)) /* 6 KB */ +#define OTP_HIGH_IMAGE_SIZE_1000 (0x200 * sizeof(u16)) /* 1024 bytes */ +#define OTP_MAX_LL_ITEMS_1000 (3) /* OTP blocks for 1000 */ +#define OTP_MAX_LL_ITEMS_6x00 (4) /* OTP blocks for 6x00 */ +#define OTP_MAX_LL_ITEMS_6x50 (7) /* OTP blocks for 6x50 */ /* 2.4 GHz */ extern const u8 iwl_eeprom_band_1[14]; --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-agn.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -115,9 +115,6 @@ /* always get timestamp with Rx frame */ priv->staging_rxon.flags |= RXON_FLG_TSF2HOST_MSK; - /* allow CTS-to-self if possible. this is relevant only for - * 5000, but will not damage 4965 */ - priv->staging_rxon.flags |= RXON_FLG_SELF_CTS_EN; ret = iwl_check_rxon_cmd(priv); if (ret) { @@ -217,6 +214,13 @@ "Could not send WEP static key.\n"); } + /* + * allow CTS-to-self if possible for new association. + * this is relevant only for 5000 series and up, + * but will not damage 4965 + */ + priv->staging_rxon.flags |= RXON_FLG_SELF_CTS_EN; + /* Apply the new configuration * RXON assoc doesn't clear the station table in uCode, */ @@ -1348,7 +1352,7 @@ */ static int iwl_read_ucode(struct iwl_priv *priv) { - struct iwl_ucode *ucode; + struct iwl_ucode_header *ucode; int ret = -EINVAL, index; const struct firmware *ucode_raw; const char *name_pre = priv->cfg->fw_name_pre; @@ -1357,7 +1361,8 @@ char buf[25]; u8 *src; size_t len; - u32 api_ver, inst_size, data_size, init_size, init_data_size, boot_size; + u32 api_ver, build; + u32 inst_size, data_size, init_size, init_data_size, boot_size; /* Ask kernel firmware_class module to get the boot firmware off disk. * request_firmware() is synchronous, file is in memory on return. */ @@ -1387,23 +1392,26 @@ if (ret < 0) goto error; - /* Make sure that we got at least our header! */ - if (ucode_raw->size < sizeof(*ucode)) { + /* Make sure that we got at least the v1 header! */ + if (ucode_raw->size < priv->cfg->ops->ucode->get_header_size(1)) { IWL_ERR(priv, "File size way too small!\n"); ret = -EINVAL; goto err_release; } /* Data from ucode file: header followed by uCode images */ - ucode = (void *)ucode_raw->data; + ucode = (struct iwl_ucode_header *)ucode_raw->data; priv->ucode_ver = le32_to_cpu(ucode->ver); api_ver = IWL_UCODE_API(priv->ucode_ver); - inst_size = le32_to_cpu(ucode->inst_size); - data_size = le32_to_cpu(ucode->data_size); - init_size = le32_to_cpu(ucode->init_size); - init_data_size = le32_to_cpu(ucode->init_data_size); - boot_size = le32_to_cpu(ucode->boot_size); + build = priv->cfg->ops->ucode->get_build(ucode, api_ver); + inst_size = priv->cfg->ops->ucode->get_inst_size(ucode, api_ver); + data_size = priv->cfg->ops->ucode->get_data_size(ucode, api_ver); + init_size = priv->cfg->ops->ucode->get_init_size(ucode, api_ver); + init_data_size = + priv->cfg->ops->ucode->get_init_data_size(ucode, api_ver); + boot_size = priv->cfg->ops->ucode->get_boot_size(ucode, api_ver); + src = priv->cfg->ops->ucode->get_data(ucode, api_ver); /* api_ver should match the api version forming part of the * firmware filename ... but we don't check for that and only rely @@ -1429,6 +1437,9 @@ IWL_UCODE_API(priv->ucode_ver), IWL_UCODE_SERIAL(priv->ucode_ver)); + if (build) + IWL_DEBUG_INFO(priv, "Build %u\n", build); + IWL_DEBUG_INFO(priv, "f/w package hdr ucode version raw = 0x%x\n", priv->ucode_ver); IWL_DEBUG_INFO(priv, "f/w package hdr runtime inst size = %u\n", @@ -1443,12 +1454,14 @@ boot_size); /* Verify size of file vs. image size info in file's header */ - if (ucode_raw->size < sizeof(*ucode) + + if (ucode_raw->size != + priv->cfg->ops->ucode->get_header_size(api_ver) + inst_size + data_size + init_size + init_data_size + boot_size) { - IWL_DEBUG_INFO(priv, "uCode file size %d too small\n", - (int)ucode_raw->size); + IWL_DEBUG_INFO(priv, + "uCode file size %d does not match expected size\n", + (int)ucode_raw->size); ret = -EINVAL; goto err_release; } @@ -1528,42 +1541,42 @@ /* Copy images into buffers for card's bus-master reads ... */ /* Runtime instructions (first block of data in file) */ - src = &ucode->data[0]; - len = priv->ucode_code.len; + len = inst_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) uCode instr len %Zd\n", len); memcpy(priv->ucode_code.v_addr, src, len); + src += len; + IWL_DEBUG_INFO(priv, "uCode instr buf vaddr = 0x%p, paddr = 0x%08x\n", priv->ucode_code.v_addr, (u32)priv->ucode_code.p_addr); /* Runtime data (2nd block) * NOTE: Copy into backup buffer will be done in iwl_up() */ - src = &ucode->data[inst_size]; - len = priv->ucode_data.len; + len = data_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) uCode data len %Zd\n", len); memcpy(priv->ucode_data.v_addr, src, len); memcpy(priv->ucode_data_backup.v_addr, src, len); + src += len; /* Initialization instructions (3rd block) */ if (init_size) { - src = &ucode->data[inst_size + data_size]; - len = priv->ucode_init.len; + len = init_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) init instr len %Zd\n", len); memcpy(priv->ucode_init.v_addr, src, len); + src += len; } /* Initialization data (4th block) */ if (init_data_size) { - src = &ucode->data[inst_size + data_size + init_size]; - len = priv->ucode_init_data.len; + len = init_data_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) init data len %Zd\n", len); memcpy(priv->ucode_init_data.v_addr, src, len); + src += len; } /* Bootstrap instructions (5th block) */ - src = &ucode->data[inst_size + data_size + init_size + init_data_size]; - len = priv->ucode_boot.len; + len = boot_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) boot instr len %Zd\n", len); memcpy(priv->ucode_boot.v_addr, src, len); @@ -2206,7 +2219,7 @@ priv->is_open = 0; - if (iwl_is_ready_rf(priv)) { + if (iwl_is_ready_rf(priv) || test_bit(STATUS_SCAN_HW, &priv->status)) { /* stop mac, cancel any scan request and clear * RXON_FILTER_ASSOC_MSK BIT */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-dev.h +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -66,6 +66,7 @@ /* shared structures from iwl-5000.c */ extern struct iwl_mod_params iwl50_mod_params; extern struct iwl_ops iwl5000_ops; +extern struct iwl_ucode_ops iwl5000_ucode; extern struct iwl_lib_ops iwl5000_lib; extern struct iwl_hcmd_ops iwl5000_hcmd; extern struct iwl_hcmd_utils_ops iwl5000_hcmd_utils; @@ -525,15 +526,29 @@ }; /* uCode file layout */ -struct iwl_ucode { - __le32 ver; /* major/minor/API/serial */ - __le32 inst_size; /* bytes of runtime instructions */ - __le32 data_size; /* bytes of runtime data */ - __le32 init_size; /* bytes of initialization instructions */ - __le32 init_data_size; /* bytes of initialization data */ - __le32 boot_size; /* bytes of bootstrap instructions */ - u8 data[0]; /* data in same order as "size" elements */ +struct iwl_ucode_header { + __le32 ver; /* major/minor/API/serial */ + union { + struct { + __le32 inst_size; /* bytes of runtime code */ + __le32 data_size; /* bytes of runtime data */ + __le32 init_size; /* bytes of init code */ + __le32 init_data_size; /* bytes of init data */ + __le32 boot_size; /* bytes of bootstrap code */ + u8 data[0]; /* in same order as sizes */ + } v1; + struct { + __le32 build; /* build number */ + __le32 inst_size; /* bytes of runtime code */ + __le32 data_size; /* bytes of runtime data */ + __le32 init_size; /* bytes of init code */ + __le32 init_data_size; /* bytes of init data */ + __le32 boot_size; /* bytes of bootstrap code */ + u8 data[0]; /* in same order as sizes */ + } v2; + } u; }; +#define UCODE_HEADER_SIZE(ver) ((ver) == 1 ? 24 : 28) struct iwl4965_ibss_seq { u8 mac[ETH_ALEN]; @@ -820,6 +835,18 @@ NVM_DEVICE_TYPE_OTP, }; +/* + * Two types of OTP memory access modes + * IWL_OTP_ACCESS_ABSOLUTE - absolute address mode, + * based on physical memory addressing + * IWL_OTP_ACCESS_RELATIVE - relative address mode, + * based on logical memory addressing + */ +enum iwl_access_mode { + IWL_OTP_ACCESS_ABSOLUTE, + IWL_OTP_ACCESS_RELATIVE, +}; + /* interrupt statistics */ struct isr_statistics { u32 hw; --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-5000.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -239,6 +239,13 @@ APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS, ~APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS); + if ((priv->hw_rev & CSR_HW_REV_TYPE_MSK) == CSR_HW_REV_TYPE_1000) { + /* Setting digital SVR for 1000 card to 1.32V */ + iwl_set_bits_mask_prph(priv, APMG_DIGITAL_SVR_REG, + APMG_SVR_DIGITAL_VOLTAGE_1_32, + ~APMG_SVR_VOLTAGE_CONFIG_BIT_MSK); + } + spin_unlock_irqrestore(&priv->lock, flags); } @@ -1426,6 +1433,44 @@ return max_rssi - agc - IWL49_RSSI_OFFSET; } +#define IWL5000_UCODE_GET(item) \ +static u32 iwl5000_ucode_get_##item(const struct iwl_ucode_header *ucode,\ + u32 api_ver) \ +{ \ + if (api_ver <= 2) \ + return le32_to_cpu(ucode->u.v1.item); \ + return le32_to_cpu(ucode->u.v2.item); \ +} + +static u32 iwl5000_ucode_get_header_size(u32 api_ver) +{ + if (api_ver <= 2) + return UCODE_HEADER_SIZE(1); + return UCODE_HEADER_SIZE(2); +} + +static u32 iwl5000_ucode_get_build(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + if (api_ver <= 2) + return 0; + return le32_to_cpu(ucode->u.v2.build); +} + +static u8 *iwl5000_ucode_get_data(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + if (api_ver <= 2) + return (u8 *) ucode->u.v1.data; + return (u8 *) ucode->u.v2.data; +} + +IWL5000_UCODE_GET(inst_size); +IWL5000_UCODE_GET(data_size); +IWL5000_UCODE_GET(init_size); +IWL5000_UCODE_GET(init_data_size); +IWL5000_UCODE_GET(boot_size); + struct iwl_hcmd_ops iwl5000_hcmd = { .rxon_assoc = iwl5000_send_rxon_assoc, .commit_rxon = iwl_commit_rxon, @@ -1441,6 +1486,17 @@ .calc_rssi = iwl5000_calc_rssi, }; +struct iwl_ucode_ops iwl5000_ucode = { + .get_header_size = iwl5000_ucode_get_header_size, + .get_build = iwl5000_ucode_get_build, + .get_inst_size = iwl5000_ucode_get_inst_size, + .get_data_size = iwl5000_ucode_get_data_size, + .get_init_size = iwl5000_ucode_get_init_size, + .get_init_data_size = iwl5000_ucode_get_init_data_size, + .get_boot_size = iwl5000_ucode_get_boot_size, + .get_data = iwl5000_ucode_get_data, +}; + struct iwl_lib_ops iwl5000_lib = { .set_hw_params = iwl5000_hw_set_hw_params, .txq_update_byte_cnt_tbl = iwl5000_txq_update_byte_cnt_tbl, @@ -1542,12 +1598,14 @@ }; struct iwl_ops iwl5000_ops = { + .ucode = &iwl5000_ucode, .lib = &iwl5000_lib, .hcmd = &iwl5000_hcmd, .utils = &iwl5000_hcmd_utils, }; static struct iwl_ops iwl5150_ops = { + .ucode = &iwl5000_ucode, .lib = &iwl5150_lib, .hcmd = &iwl5000_hcmd, .utils = &iwl5000_hcmd_utils, --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -152,6 +152,19 @@ } EXPORT_SYMBOL(iwlcore_eeprom_verify_signature); +static void iwl_set_otp_access(struct iwl_priv *priv, enum iwl_access_mode mode) +{ + u32 otpgp; + + otpgp = iwl_read32(priv, CSR_OTP_GP_REG); + if (mode == IWL_OTP_ACCESS_ABSOLUTE) + iwl_clear_bit(priv, CSR_OTP_GP_REG, + CSR_OTP_GP_REG_OTP_ACCESS_MODE); + else + iwl_set_bit(priv, CSR_OTP_GP_REG, + CSR_OTP_GP_REG_OTP_ACCESS_MODE); +} + static int iwlcore_get_nvm_type(struct iwl_priv *priv) { u32 otpgp; @@ -249,6 +262,123 @@ return ret; } +static int iwl_read_otp_word(struct iwl_priv *priv, u16 addr, u16 *eeprom_data) +{ + int ret = 0; + u32 r; + u32 otpgp; + + _iwl_write32(priv, CSR_EEPROM_REG, + CSR_EEPROM_REG_MSK_ADDR & (addr << 1)); + ret = iwl_poll_direct_bit(priv, CSR_EEPROM_REG, + CSR_EEPROM_REG_READ_VALID_MSK, + IWL_EEPROM_ACCESS_TIMEOUT); + if (ret < 0) { + IWL_ERR(priv, "Time out reading OTP[%d]\n", addr); + return ret; + } + r = _iwl_read_direct32(priv, CSR_EEPROM_REG); + /* check for ECC errors: */ + otpgp = iwl_read32(priv, CSR_OTP_GP_REG); + if (otpgp & CSR_OTP_GP_REG_ECC_UNCORR_STATUS_MSK) { + /* stop in this case */ + /* set the uncorrectable OTP ECC bit for acknowledgement */ + iwl_set_bit(priv, CSR_OTP_GP_REG, + CSR_OTP_GP_REG_ECC_UNCORR_STATUS_MSK); + IWL_ERR(priv, "Uncorrectable OTP ECC error, abort OTP read\n"); + return -EINVAL; + } + if (otpgp & CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK) { + /* continue in this case */ + /* set the correctable OTP ECC bit for acknowledgement */ + iwl_set_bit(priv, CSR_OTP_GP_REG, + CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK); + IWL_ERR(priv, "Correctable OTP ECC error, continue read\n"); + } + *eeprom_data = le16_to_cpu((__force __le16)(r >> 16)); + return 0; +} + +/* + * iwl_is_otp_empty: check for empty OTP + */ +static bool iwl_is_otp_empty(struct iwl_priv *priv) +{ + u16 next_link_addr = 0, link_value; + bool is_empty = false; + + /* locate the beginning of OTP link list */ + if (!iwl_read_otp_word(priv, next_link_addr, &link_value)) { + if (!link_value) { + IWL_ERR(priv, "OTP is empty\n"); + is_empty = true; + } + } else { + IWL_ERR(priv, "Unable to read first block of OTP list.\n"); + is_empty = true; + } + + return is_empty; +} + + +/* + * iwl_find_otp_image: find EEPROM image in OTP + * finding the OTP block that contains the EEPROM image. + * the last valid block on the link list (the block _before_ the last block) + * is the block we should read and used to configure the device. + * If all the available OTP blocks are full, the last block will be the block + * we should read and used to configure the device. + * only perform this operation if shadow RAM is disabled + */ +static int iwl_find_otp_image(struct iwl_priv *priv, + u16 *validblockaddr) +{ + u16 next_link_addr = 0, link_value = 0, valid_addr; + int usedblocks = 0; + + /* set addressing mode to absolute to traverse the link list */ + iwl_set_otp_access(priv, IWL_OTP_ACCESS_ABSOLUTE); + + /* checking for empty OTP or error */ + if (iwl_is_otp_empty(priv)) + return -EINVAL; + + /* + * start traverse link list + * until reach the max number of OTP blocks + * different devices have different number of OTP blocks + */ + do { + /* save current valid block address + * check for more block on the link list + */ + valid_addr = next_link_addr; + next_link_addr = link_value * sizeof(u16); + IWL_DEBUG_INFO(priv, "OTP blocks %d addr 0x%x\n", + usedblocks, next_link_addr); + if (iwl_read_otp_word(priv, next_link_addr, &link_value)) + return -EINVAL; + if (!link_value) { + /* + * reach the end of link list, return success and + * set address point to the starting address + * of the image + */ + *validblockaddr = valid_addr; + /* skip first 2 bytes (link list pointer) */ + *validblockaddr += 2; + return 0; + } + /* more in the link list, continue */ + usedblocks++; + } while (usedblocks <= priv->cfg->max_ll_items); + + /* OTP has no valid blocks */ + IWL_DEBUG_INFO(priv, "OTP has no valid blocks\n"); + return -EINVAL; +} + /** * iwl_eeprom_init - read EEPROM contents * @@ -263,14 +393,13 @@ int sz; int ret; u16 addr; - u32 otpgp; + u16 validblockaddr = 0; + u16 cache_addr = 0; priv->nvm_device_type = iwlcore_get_nvm_type(priv); /* allocate eeprom */ - if (priv->nvm_device_type == NVM_DEVICE_TYPE_OTP) - priv->cfg->eeprom_size = - OTP_BLOCK_SIZE * OTP_LOWER_BLOCKS_TOTAL; + IWL_DEBUG_INFO(priv, "NVM size = %d\n", priv->cfg->eeprom_size); sz = priv->cfg->eeprom_size; priv->eeprom = kzalloc(sz, GFP_KERNEL); if (!priv->eeprom) { @@ -298,46 +427,31 @@ if (ret) { IWL_ERR(priv, "Failed to initialize OTP access.\n"); ret = -ENOENT; - goto err; + goto done; } _iwl_write32(priv, CSR_EEPROM_GP, iwl_read32(priv, CSR_EEPROM_GP) & ~CSR_EEPROM_GP_IF_OWNER_MSK); - /* clear */ - _iwl_write32(priv, CSR_OTP_GP_REG, - iwl_read32(priv, CSR_OTP_GP_REG) | + + iwl_set_bit(priv, CSR_OTP_GP_REG, CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK | CSR_OTP_GP_REG_ECC_UNCORR_STATUS_MSK); - - for (addr = 0; addr < sz; addr += sizeof(u16)) { - u32 r; - - _iwl_write32(priv, CSR_EEPROM_REG, - CSR_EEPROM_REG_MSK_ADDR & (addr << 1)); - - ret = iwl_poll_direct_bit(priv, CSR_EEPROM_REG, - CSR_EEPROM_REG_READ_VALID_MSK, - IWL_EEPROM_ACCESS_TIMEOUT); - if (ret < 0) { - IWL_ERR(priv, "Time out reading OTP[%d]\n", addr); + /* traversing the linked list if no shadow ram supported */ + if (!priv->cfg->shadow_ram_support) { + if (iwl_find_otp_image(priv, &validblockaddr)) { + ret = -ENOENT; goto done; } - r = _iwl_read_direct32(priv, CSR_EEPROM_REG); - /* check for ECC errors: */ - otpgp = iwl_read32(priv, CSR_OTP_GP_REG); - if (otpgp & CSR_OTP_GP_REG_ECC_UNCORR_STATUS_MSK) { - /* stop in this case */ - IWL_ERR(priv, "Uncorrectable OTP ECC error, Abort OTP read\n"); + } + for (addr = validblockaddr; addr < validblockaddr + sz; + addr += sizeof(u16)) { + u16 eeprom_data; + + ret = iwl_read_otp_word(priv, addr, &eeprom_data); + if (ret) goto done; - } - if (otpgp & CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK) { - /* continue in this case */ - _iwl_write32(priv, CSR_OTP_GP_REG, - iwl_read32(priv, CSR_OTP_GP_REG) | - CSR_OTP_GP_REG_ECC_CORR_STATUS_MSK); - IWL_ERR(priv, "Correctable OTP ECC error, continue read\n"); - } - e[addr / 2] = le16_to_cpu((__force __le16)(r >> 16)); + e[cache_addr / 2] = eeprom_data; + cache_addr += sizeof(u16); } } else { /* eeprom is an array of 16bit values */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-rx.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -239,33 +239,51 @@ struct iwl_rx_queue *rxq = &priv->rxq; struct list_head *element; struct iwl_rx_mem_buffer *rxb; + struct sk_buff *skb; unsigned long flags; while (1) { spin_lock_irqsave(&rxq->lock, flags); - if (list_empty(&rxq->rx_used)) { spin_unlock_irqrestore(&rxq->lock, flags); return; } - element = rxq->rx_used.next; - rxb = list_entry(element, struct iwl_rx_mem_buffer, list); - list_del(element); - spin_unlock_irqrestore(&rxq->lock, flags); + if (rxq->free_count > RX_LOW_WATERMARK) + priority |= __GFP_NOWARN; /* Alloc a new receive buffer */ - rxb->skb = alloc_skb(priv->hw_params.rx_buf_size + 256, + skb = alloc_skb(priv->hw_params.rx_buf_size + 256, priority); - if (!rxb->skb) { - IWL_CRIT(priv, "Can not allocate SKB buffers\n"); + if (!skb) { + if (net_ratelimit()) + IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n"); + if ((rxq->free_count <= RX_LOW_WATERMARK) && + net_ratelimit()) + IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n", + priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL", + rxq->free_count); /* We don't reschedule replenish work here -- we will * call the restock method and if it still needs * more buffers it will schedule replenish */ break; } + spin_lock_irqsave(&rxq->lock, flags); + + if (list_empty(&rxq->rx_used)) { + spin_unlock_irqrestore(&rxq->lock, flags); + dev_kfree_skb_any(skb); + return; + } + element = rxq->rx_used.next; + rxb = list_entry(element, struct iwl_rx_mem_buffer, list); + list_del(element); + + spin_unlock_irqrestore(&rxq->lock, flags); + + rxb->skb = skb; /* Get physical address of RB/SKB */ rxb->real_dma_addr = pci_map_single( priv->pci_dev, --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -1196,6 +1196,7 @@ struct iwl_rx_queue *rxq = &priv->rxq; struct list_head *element; struct iwl_rx_mem_buffer *rxb; + struct sk_buff *skb; unsigned long flags; while (1) { @@ -1205,25 +1206,39 @@ spin_unlock_irqrestore(&rxq->lock, flags); return; } - - element = rxq->rx_used.next; - rxb = list_entry(element, struct iwl_rx_mem_buffer, list); - list_del(element); spin_unlock_irqrestore(&rxq->lock, flags); + if (rxq->free_count > RX_LOW_WATERMARK) + priority |= __GFP_NOWARN; /* Alloc a new receive buffer */ - rxb->skb = - alloc_skb(priv->hw_params.rx_buf_size, - priority); - if (!rxb->skb) { + skb = alloc_skb(priv->hw_params.rx_buf_size, priority); + if (!skb) { if (net_ratelimit()) - IWL_CRIT(priv, ": Can not allocate SKB buffers\n"); + IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n"); + if ((rxq->free_count <= RX_LOW_WATERMARK) && + net_ratelimit()) + IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n", + priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL", + rxq->free_count); /* We don't reschedule replenish work here -- we will * call the restock method and if it still needs * more buffers it will schedule replenish */ break; } + spin_lock_irqsave(&rxq->lock, flags); + if (list_empty(&rxq->rx_used)) { + spin_unlock_irqrestore(&rxq->lock, flags); + dev_kfree_skb_any(skb); + return; + } + element = rxq->rx_used.next; + rxb = list_entry(element, struct iwl_rx_mem_buffer, list); + list_del(element); + spin_unlock_irqrestore(&rxq->lock, flags); + + rxb->skb = skb; + /* If radiotap head is required, reserve some headroom here. * The physical head count is a variable rx_stats->phy_count. * We reserve 4 bytes here. Plus these extra bytes, the @@ -2111,7 +2126,7 @@ */ static int iwl3945_read_ucode(struct iwl_priv *priv) { - struct iwl_ucode *ucode; + const struct iwl_ucode_header *ucode; int ret = -EINVAL, index; const struct firmware *ucode_raw; /* firmware file name contains uCode/driver compatibility version */ @@ -2152,22 +2167,24 @@ goto error; /* Make sure that we got at least our header! */ - if (ucode_raw->size < sizeof(*ucode)) { + if (ucode_raw->size < priv->cfg->ops->ucode->get_header_size(1)) { IWL_ERR(priv, "File size way too small!\n"); ret = -EINVAL; goto err_release; } /* Data from ucode file: header followed by uCode images */ - ucode = (void *)ucode_raw->data; + ucode = (struct iwl_ucode_header *)ucode_raw->data; priv->ucode_ver = le32_to_cpu(ucode->ver); api_ver = IWL_UCODE_API(priv->ucode_ver); - inst_size = le32_to_cpu(ucode->inst_size); - data_size = le32_to_cpu(ucode->data_size); - init_size = le32_to_cpu(ucode->init_size); - init_data_size = le32_to_cpu(ucode->init_data_size); - boot_size = le32_to_cpu(ucode->boot_size); + inst_size = priv->cfg->ops->ucode->get_inst_size(ucode, api_ver); + data_size = priv->cfg->ops->ucode->get_data_size(ucode, api_ver); + init_size = priv->cfg->ops->ucode->get_init_size(ucode, api_ver); + init_data_size = + priv->cfg->ops->ucode->get_init_data_size(ucode, api_ver); + boot_size = priv->cfg->ops->ucode->get_boot_size(ucode, api_ver); + src = priv->cfg->ops->ucode->get_data(ucode, api_ver); /* api_ver should match the api version forming part of the * firmware filename ... but we don't check for that and only rely @@ -2208,12 +2225,13 @@ /* Verify size of file vs. image size info in file's header */ - if (ucode_raw->size < sizeof(*ucode) + + if (ucode_raw->size != priv->cfg->ops->ucode->get_header_size(api_ver) + inst_size + data_size + init_size + init_data_size + boot_size) { - IWL_DEBUG_INFO(priv, "uCode file size %zd too small\n", - ucode_raw->size); + IWL_DEBUG_INFO(priv, + "uCode file size %zd does not match expected size\n", + ucode_raw->size); ret = -EINVAL; goto err_release; } @@ -2296,44 +2314,44 @@ /* Copy images into buffers for card's bus-master reads ... */ /* Runtime instructions (first block of data in file) */ - src = &ucode->data[0]; - len = priv->ucode_code.len; + len = inst_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) uCode instr len %zd\n", len); memcpy(priv->ucode_code.v_addr, src, len); + src += len; + IWL_DEBUG_INFO(priv, "uCode instr buf vaddr = 0x%p, paddr = 0x%08x\n", priv->ucode_code.v_addr, (u32)priv->ucode_code.p_addr); /* Runtime data (2nd block) * NOTE: Copy into backup buffer will be done in iwl3945_up() */ - src = &ucode->data[inst_size]; - len = priv->ucode_data.len; + len = data_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) uCode data len %zd\n", len); memcpy(priv->ucode_data.v_addr, src, len); memcpy(priv->ucode_data_backup.v_addr, src, len); + src += len; /* Initialization instructions (3rd block) */ if (init_size) { - src = &ucode->data[inst_size + data_size]; - len = priv->ucode_init.len; + len = init_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) init instr len %zd\n", len); memcpy(priv->ucode_init.v_addr, src, len); + src += len; } /* Initialization data (4th block) */ if (init_data_size) { - src = &ucode->data[inst_size + data_size + init_size]; - len = priv->ucode_init_data.len; + len = init_data_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) init data len %zd\n", len); memcpy(priv->ucode_init_data.v_addr, src, len); + src += len; } /* Bootstrap instructions (5th block) */ - src = &ucode->data[inst_size + data_size + init_size + init_data_size]; - len = priv->ucode_boot.len; + len = boot_size; IWL_DEBUG_INFO(priv, "Copying (but not loading) boot instr len %zd\n", len); memcpy(priv->ucode_boot.v_addr, src, len); --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-core.h +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-core.h @@ -116,6 +116,17 @@ void (*set_ct_kill)(struct iwl_priv *priv); }; +struct iwl_ucode_ops { + u32 (*get_header_size)(u32); + u32 (*get_build)(const struct iwl_ucode_header *, u32); + u32 (*get_inst_size)(const struct iwl_ucode_header *, u32); + u32 (*get_data_size)(const struct iwl_ucode_header *, u32); + u32 (*get_init_size)(const struct iwl_ucode_header *, u32); + u32 (*get_init_data_size)(const struct iwl_ucode_header *, u32); + u32 (*get_boot_size)(const struct iwl_ucode_header *, u32); + u8 * (*get_data)(const struct iwl_ucode_header *, u32); +}; + struct iwl_lib_ops { /* set hw dependent parameters */ int (*set_hw_params)(struct iwl_priv *priv); @@ -171,6 +182,7 @@ }; struct iwl_ops { + const struct iwl_ucode_ops *ucode; const struct iwl_lib_ops *lib; const struct iwl_hcmd_ops *hcmd; const struct iwl_hcmd_utils_ops *utils; @@ -195,6 +207,9 @@ * filename is constructed as fw_name_pre.ucode. * @ucode_api_max: Highest version of uCode API supported by driver. * @ucode_api_min: Lowest version of uCode API supported by driver. + * @max_ll_items: max number of OTP blocks + * @shadow_ram_support: shadow support for OTP memory + * @use_rts_for_ht: use rts/cts protection for HT traffic * * We enable the driver to be backward compatible wrt API version. The * driver specifies which APIs it supports (with @ucode_api_max being the @@ -231,6 +246,9 @@ u8 valid_rx_ant; bool need_pll_cfg; bool use_isr_legacy; + const u16 max_ll_items; + const bool shadow_ram_support; + bool use_rts_for_ht; }; /*************************** --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-6000.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -46,8 +46,8 @@ #include "iwl-5000-hw.h" /* Highest firmware API version supported */ -#define IWL6000_UCODE_API_MAX 2 -#define IWL6050_UCODE_API_MAX 2 +#define IWL6000_UCODE_API_MAX 3 +#define IWL6050_UCODE_API_MAX 3 /* Lowest firmware API version supported */ #define IWL6000_UCODE_API_MIN 1 @@ -69,6 +69,7 @@ }; static struct iwl_ops iwl6000_ops = { + .ucode = &iwl5000_ucode, .lib = &iwl5000_lib, .hcmd = &iwl5000_hcmd, .utils = &iwl6000_hcmd_utils, @@ -81,13 +82,15 @@ .ucode_api_min = IWL6000_UCODE_API_MIN, .sku = IWL_SKU_A|IWL_SKU_G, .ops = &iwl6000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_BC, .valid_rx_ant = ANT_BC, .need_pll_cfg = false, + .max_ll_items = OTP_MAX_LL_ITEMS_6x00, + .shadow_ram_support = true, }; struct iwl_cfg iwl6000_2agn_cfg = { @@ -97,13 +100,16 @@ .ucode_api_min = IWL6000_UCODE_API_MIN, .sku = IWL_SKU_A|IWL_SKU_G|IWL_SKU_N, .ops = &iwl6000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_AB, .valid_rx_ant = ANT_AB, .need_pll_cfg = false, + .max_ll_items = OTP_MAX_LL_ITEMS_6x00, + .shadow_ram_support = true, + .use_rts_for_ht = true, /* use rts/cts protection */ }; struct iwl_cfg iwl6050_2agn_cfg = { @@ -113,13 +119,16 @@ .ucode_api_min = IWL6050_UCODE_API_MIN, .sku = IWL_SKU_A|IWL_SKU_G|IWL_SKU_N, .ops = &iwl6000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_AB, .valid_rx_ant = ANT_AB, .need_pll_cfg = false, + .max_ll_items = OTP_MAX_LL_ITEMS_6x00, + .shadow_ram_support = true, + .use_rts_for_ht = true, /* use rts/cts protection */ }; struct iwl_cfg iwl6000_3agn_cfg = { @@ -129,13 +138,16 @@ .ucode_api_min = IWL6000_UCODE_API_MIN, .sku = IWL_SKU_A|IWL_SKU_G|IWL_SKU_N, .ops = &iwl6000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_ABC, .valid_rx_ant = ANT_ABC, .need_pll_cfg = false, + .max_ll_items = OTP_MAX_LL_ITEMS_6x00, + .shadow_ram_support = true, + .use_rts_for_ht = true, /* use rts/cts protection */ }; struct iwl_cfg iwl6050_3agn_cfg = { @@ -145,13 +157,16 @@ .ucode_api_min = IWL6050_UCODE_API_MIN, .sku = IWL_SKU_A|IWL_SKU_G|IWL_SKU_N, .ops = &iwl6000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_ABC, .valid_rx_ant = ANT_ABC, .need_pll_cfg = false, + .max_ll_items = OTP_MAX_LL_ITEMS_6x00, + .shadow_ram_support = true, + .use_rts_for_ht = true, /* use rts/cts protection */ }; MODULE_FIRMWARE(IWL6000_MODULE_FIRMWARE(IWL6000_UCODE_API_MAX)); --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-4965.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -2221,12 +2221,50 @@ cancel_work_sync(&priv->txpower_work); } +#define IWL4965_UCODE_GET(item) \ +static u32 iwl4965_ucode_get_##item(const struct iwl_ucode_header *ucode,\ + u32 api_ver) \ +{ \ + return le32_to_cpu(ucode->u.v1.item); \ +} + +static u32 iwl4965_ucode_get_header_size(u32 api_ver) +{ + return UCODE_HEADER_SIZE(1); +} +static u32 iwl4965_ucode_get_build(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + return 0; +} +static u8 *iwl4965_ucode_get_data(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + return (u8 *) ucode->u.v1.data; +} + +IWL4965_UCODE_GET(inst_size); +IWL4965_UCODE_GET(data_size); +IWL4965_UCODE_GET(init_size); +IWL4965_UCODE_GET(init_data_size); +IWL4965_UCODE_GET(boot_size); + static struct iwl_hcmd_ops iwl4965_hcmd = { .rxon_assoc = iwl4965_send_rxon_assoc, .commit_rxon = iwl_commit_rxon, .set_rxon_chain = iwl_set_rxon_chain, }; +static struct iwl_ucode_ops iwl4965_ucode = { + .get_header_size = iwl4965_ucode_get_header_size, + .get_build = iwl4965_ucode_get_build, + .get_inst_size = iwl4965_ucode_get_inst_size, + .get_data_size = iwl4965_ucode_get_data_size, + .get_init_size = iwl4965_ucode_get_init_size, + .get_init_data_size = iwl4965_ucode_get_init_data_size, + .get_boot_size = iwl4965_ucode_get_boot_size, + .get_data = iwl4965_ucode_get_data, +}; static struct iwl_hcmd_utils_ops iwl4965_hcmd_utils = { .get_hcmd_size = iwl4965_get_hcmd_size, .build_addsta_hcmd = iwl4965_build_addsta_hcmd, @@ -2287,6 +2325,7 @@ }; static struct iwl_ops iwl4965_ops = { + .ucode = &iwl4965_ucode, .lib = &iwl4965_lib, .hcmd = &iwl4965_hcmd, .utils = &iwl4965_hcmd_utils, --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-1000.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-1000.c @@ -46,7 +46,7 @@ #include "iwl-5000-hw.h" /* Highest firmware API version supported */ -#define IWL1000_UCODE_API_MAX 2 +#define IWL1000_UCODE_API_MAX 3 /* Lowest firmware API version supported */ #define IWL1000_UCODE_API_MIN 1 @@ -62,12 +62,15 @@ .ucode_api_min = IWL1000_UCODE_API_MIN, .sku = IWL_SKU_G|IWL_SKU_N, .ops = &iwl5000_ops, - .eeprom_size = IWL_5000_EEPROM_IMG_SIZE, + .eeprom_size = OTP_LOW_IMAGE_SIZE, .eeprom_ver = EEPROM_5000_EEPROM_VERSION, .eeprom_calib_ver = EEPROM_5000_TX_POWER_VERSION, .mod_params = &iwl50_mod_params, .valid_tx_ant = ANT_A, .valid_rx_ant = ANT_AB, .need_pll_cfg = true, + .max_ll_items = OTP_MAX_LL_ITEMS_1000, + .shadow_ram_support = false, + .use_rts_for_ht = true, /* use rts/cts protection */ }; --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-scan.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -799,7 +799,8 @@ { struct iwl_priv *priv = container_of(work, struct iwl_priv, abort_scan); - if (!iwl_is_ready(priv)) + if (!test_bit(STATUS_READY, &priv->status) || + !test_bit(STATUS_GEO_CONFIGURED, &priv->status)) return; mutex_lock(&priv->mutex); --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-tx.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1233,8 +1233,16 @@ return -ENXIO; } + if (priv->stations[sta_id].tid[tid].agg.state == + IWL_EMPTYING_HW_QUEUE_ADDBA) { + IWL_DEBUG_HT(priv, "AGG stop before setup done\n"); + ieee80211_stop_tx_ba_cb_irqsafe(priv->hw, ra, tid); + priv->stations[sta_id].tid[tid].agg.state = IWL_AGG_OFF; + return 0; + } + if (priv->stations[sta_id].tid[tid].agg.state != IWL_AGG_ON) - IWL_WARN(priv, "Stopping AGG while state not IWL_AGG_ON\n"); + IWL_WARN(priv, "Stopping AGG while state not ON or starting\n"); tid_data = &priv->stations[sta_id].tid[tid]; ssn = (tid_data->seq_number & IEEE80211_SCTL_SEQ) >> 4; --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-3945.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -2784,11 +2784,50 @@ return 0; } +#define IWL3945_UCODE_GET(item) \ +static u32 iwl3945_ucode_get_##item(const struct iwl_ucode_header *ucode,\ + u32 api_ver) \ +{ \ + return le32_to_cpu(ucode->u.v1.item); \ +} + +static u32 iwl3945_ucode_get_header_size(u32 api_ver) +{ + return UCODE_HEADER_SIZE(1); +} +static u32 iwl3945_ucode_get_build(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + return 0; +} +static u8 *iwl3945_ucode_get_data(const struct iwl_ucode_header *ucode, + u32 api_ver) +{ + return (u8 *) ucode->u.v1.data; +} + +IWL3945_UCODE_GET(inst_size); +IWL3945_UCODE_GET(data_size); +IWL3945_UCODE_GET(init_size); +IWL3945_UCODE_GET(init_data_size); +IWL3945_UCODE_GET(boot_size); + static struct iwl_hcmd_ops iwl3945_hcmd = { .rxon_assoc = iwl3945_send_rxon_assoc, .commit_rxon = iwl3945_commit_rxon, }; +static struct iwl_ucode_ops iwl3945_ucode = { + .get_header_size = iwl3945_ucode_get_header_size, + .get_build = iwl3945_ucode_get_build, + .get_inst_size = iwl3945_ucode_get_inst_size, + .get_data_size = iwl3945_ucode_get_data_size, + .get_init_size = iwl3945_ucode_get_init_size, + .get_init_data_size = iwl3945_ucode_get_init_data_size, + .get_boot_size = iwl3945_ucode_get_boot_size, + .get_data = iwl3945_ucode_get_data, +}; + static struct iwl_lib_ops iwl3945_lib = { .txq_attach_buf_to_tfd = iwl3945_hw_txq_attach_buf_to_tfd, .txq_free_tfd = iwl3945_hw_txq_free_tfd, @@ -2829,6 +2868,7 @@ }; static struct iwl_ops iwl3945_ops = { + .ucode = &iwl3945_ucode, .lib = &iwl3945_lib, .hcmd = &iwl3945_hcmd, .utils = &iwl3945_hcmd_utils, --- linux-ec2-2.6.31.orig/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ linux-ec2-2.6.31/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -415,6 +415,15 @@ else if (tid == IWL_AGG_ALL_TID) for (tid = 0; tid < TID_MAX_LOAD_COUNT; tid++) rs_tl_turn_on_agg_for_tid(priv, lq_data, tid, sta); + if (priv->cfg->use_rts_for_ht) { + /* + * switch to RTS/CTS if it is the prefer protection method + * for HT traffic + */ + IWL_DEBUG_HT(priv, "use RTS/CTS protection for HT\n"); + priv->staging_rxon.flags &= ~RXON_FLG_SELF_CTS_EN; + iwlcore_commit_rxon(priv); + } } static inline int get_num_of_ant_from_rate(u32 rate_n_flags) --- linux-ec2-2.6.31.orig/drivers/net/wireless/hostap/hostap_main.c +++ linux-ec2-2.6.31/drivers/net/wireless/hostap/hostap_main.c @@ -875,15 +875,16 @@ switch(type) { case HOSTAP_INTERFACE_AP: + dev->tx_queue_len = 0; /* use main radio device queue */ dev->netdev_ops = &hostap_mgmt_netdev_ops; dev->type = ARPHRD_IEEE80211; dev->header_ops = &hostap_80211_ops; break; case HOSTAP_INTERFACE_MASTER: - dev->tx_queue_len = 0; /* use main radio device queue */ dev->netdev_ops = &hostap_master_ops; break; default: + dev->tx_queue_len = 0; /* use main radio device queue */ dev->netdev_ops = &hostap_netdev_ops; } @@ -1099,6 +1100,7 @@ (u8 *) &val, 2); memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); wireless_send_event(local->dev, SIOCGIWAP, &wrqu, NULL); + wireless_send_event(local->ddev, SIOCGIWAP, &wrqu, NULL); return ret; } --- linux-ec2-2.6.31.orig/drivers/net/wireless/hostap/hostap_hw.c +++ linux-ec2-2.6.31/drivers/net/wireless/hostap/hostap_hw.c @@ -68,7 +68,7 @@ module_param_string(essid, essid, sizeof(essid), 0444); MODULE_PARM_DESC(essid, "Host AP's ESSID"); -static int iw_mode[MAX_PARM_DEVICES] = { IW_MODE_MASTER, DEF_INTS }; +static int iw_mode[MAX_PARM_DEVICES] = { IW_MODE_INFRA, DEF_INTS }; module_param_array(iw_mode, int, NULL, 0444); MODULE_PARM_DESC(iw_mode, "Initial operation mode"); @@ -2618,6 +2618,15 @@ int events = 0; u16 ev; + /* Detect early interrupt before driver is fully configued */ + if (!dev->base_addr) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: Interrupt, but dev not configured\n", + dev->name); + } + return IRQ_HANDLED; + } + iface = netdev_priv(dev); local = iface->local; @@ -3383,6 +3392,7 @@ memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(local->dev, SIOCGIWAP, &wrqu, NULL); + wireless_send_event(local->ddev, SIOCGIWAP, &wrqu, NULL); /* Disable hardware and firmware */ prism2_hw_shutdown(dev, 0); --- linux-ec2-2.6.31.orig/drivers/net/wireless/hostap/hostap_info.c +++ linux-ec2-2.6.31/drivers/net/wireless/hostap/hostap_info.c @@ -237,6 +237,7 @@ wrqu.data.length = 0; wrqu.data.flags = 0; wireless_send_event(local->dev, SIOCGIWSCAN, &wrqu, NULL); + wireless_send_event(local->ddev, SIOCGIWSCAN, &wrqu, NULL); /* Allow SIOCGIWSCAN handling to occur since we have received * scanning result */ @@ -448,8 +449,10 @@ * frames and can confuse wpa_supplicant about the current association * status. */ - if (connected || local->prev_linkstatus_connected) + if (connected || local->prev_linkstatus_connected) { wireless_send_event(local->dev, SIOCGIWAP, &wrqu, NULL); + wireless_send_event(local->ddev, SIOCGIWAP, &wrqu, NULL); + } local->prev_linkstatus_connected = connected; } --- linux-ec2-2.6.31.orig/drivers/net/wireless/libertas/wext.c +++ linux-ec2-2.6.31/drivers/net/wireless/libertas/wext.c @@ -1951,10 +1951,8 @@ if (priv->connect_status == LBS_CONNECTED) { memcpy(extra, priv->curbssparams.ssid, priv->curbssparams.ssid_len); - extra[priv->curbssparams.ssid_len] = '\0'; } else { memset(extra, 0, 32); - extra[priv->curbssparams.ssid_len] = '\0'; } /* * If none, we may want to get the one that was set --- linux-ec2-2.6.31.orig/drivers/net/wireless/libertas/if_usb.c +++ linux-ec2-2.6.31/drivers/net/wireless/libertas/if_usb.c @@ -507,7 +507,7 @@ /* Fill the receive configuration URB and initialise the Rx call back */ usb_fill_bulk_urb(cardp->rx_urb, cardp->udev, usb_rcvbulkpipe(cardp->udev, cardp->ep_in), - (void *) (skb->tail + (size_t) IPFIELD_ALIGN_OFFSET), + skb->data + IPFIELD_ALIGN_OFFSET, MRVDRV_ETH_RX_PACKET_BUFFER_SIZE, callbackfn, cardp); --- linux-ec2-2.6.31.orig/drivers/net/wireless/b43/dma.c +++ linux-ec2-2.6.31/drivers/net/wireless/b43/dma.c @@ -1158,8 +1158,9 @@ } static int dma_tx_fragment(struct b43_dmaring *ring, - struct sk_buff *skb) + struct sk_buff **in_skb) { + struct sk_buff *skb = *in_skb; const struct b43_dma_ops *ops = ring->ops; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u8 *header; @@ -1225,8 +1226,14 @@ } memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len); + memcpy(bounce_skb->cb, skb->cb, sizeof(skb->cb)); + bounce_skb->dev = skb->dev; + skb_set_queue_mapping(bounce_skb, skb_get_queue_mapping(skb)); + info = IEEE80211_SKB_CB(bounce_skb); + dev_kfree_skb_any(skb); skb = bounce_skb; + *in_skb = bounce_skb; meta->skb = skb; meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1); if (b43_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) { @@ -1334,13 +1341,22 @@ spin_lock_irqsave(&ring->lock, flags); B43_WARN_ON(!ring->tx); - /* Check if the queue was stopped in mac80211, - * but we got called nevertheless. - * That would be a mac80211 bug. */ - B43_WARN_ON(ring->stopped); - if (unlikely(free_slots(ring) < TX_SLOTS_PER_FRAME)) { - b43warn(dev->wl, "DMA queue overflow\n"); + if (unlikely(ring->stopped)) { + /* We get here only because of a bug in mac80211. + * Because of a race, one packet may be queued after + * the queue is stopped, thus we got called when we shouldn't. + * For now, just refuse the transmit. */ + if (b43_debug(dev, B43_DBG_DMAVERBOSE)) + b43err(dev->wl, "Packet after queue stopped\n"); + err = -ENOSPC; + goto out_unlock; + } + + if (unlikely(WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME))) { + /* If we get here, we have a real error with the queue + * full, but queues not stopped. */ + b43err(dev->wl, "DMA queue overflow\n"); err = -ENOSPC; goto out_unlock; } @@ -1350,7 +1366,11 @@ * static, so we don't need to store it per frame. */ ring->queue_prio = skb_get_queue_mapping(skb); - err = dma_tx_fragment(ring, skb); + /* dma_tx_fragment might reallocate the skb, so invalidate pointers pointing + * into the skb data or cb now. */ + hdr = NULL; + info = NULL; + err = dma_tx_fragment(ring, &skb); if (unlikely(err == -ENOKEY)) { /* Drop this packet, as we don't have the encryption key * anymore and must not transmit it unencrypted. */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/b43/rfkill.c +++ linux-ec2-2.6.31/drivers/net/wireless/b43/rfkill.c @@ -33,7 +33,8 @@ & B43_MMIO_RADIO_HWENABLED_HI_MASK)) return 1; } else { - if (b43_read16(dev, B43_MMIO_RADIO_HWENABLED_LO) + if (b43_status(dev) >= B43_STAT_STARTED && + b43_read16(dev, B43_MMIO_RADIO_HWENABLED_LO) & B43_MMIO_RADIO_HWENABLED_LO_MASK) return 1; } --- linux-ec2-2.6.31.orig/drivers/net/wireless/b43legacy/rfkill.c +++ linux-ec2-2.6.31/drivers/net/wireless/b43legacy/rfkill.c @@ -34,6 +34,13 @@ & B43legacy_MMIO_RADIO_HWENABLED_HI_MASK)) return 1; } else { + /* To prevent CPU fault on PPC, do not read a register + * unless the interface is started; however, on resume + * for hibernation, this routine is entered early. When + * that happens, unconditionally return TRUE. + */ + if (b43legacy_status(dev) < B43legacy_STAT_STARTED) + return 1; if (b43legacy_read16(dev, B43legacy_MMIO_RADIO_HWENABLED_LO) & B43legacy_MMIO_RADIO_HWENABLED_LO_MASK) return 1; --- linux-ec2-2.6.31.orig/drivers/net/wireless/rtl818x/rtl8187_leds.c +++ linux-ec2-2.6.31/drivers/net/wireless/rtl818x/rtl8187_leds.c @@ -210,10 +210,10 @@ /* turn the LED off before exiting */ queue_delayed_work(dev->workqueue, &priv->led_off, 0); - cancel_delayed_work_sync(&priv->led_off); - cancel_delayed_work_sync(&priv->led_on); rtl8187_unregister_led(&priv->led_rx); rtl8187_unregister_led(&priv->led_tx); + cancel_delayed_work_sync(&priv->led_off); + cancel_delayed_work_sync(&priv->led_on); } #endif /* def CONFIG_RTL8187_LED */ --- linux-ec2-2.6.31.orig/drivers/net/wireless/p54/p54usb.c +++ linux-ec2-2.6.31/drivers/net/wireless/p54/p54usb.c @@ -64,8 +64,9 @@ {USB_DEVICE(0x0915, 0x2002)}, /* Cohiba Proto board */ {USB_DEVICE(0x0baf, 0x0118)}, /* U.S. Robotics U5 802.11g Adapter*/ {USB_DEVICE(0x0bf8, 0x1009)}, /* FUJITSU E-5400 USB D1700*/ - {USB_DEVICE(0x0cde, 0x0006)}, /* Medion MD40900 */ +// DUPE {USB_DEVICE(0x0cde, 0x0006)}, /* Medion MD40900 */ {USB_DEVICE(0x0cde, 0x0008)}, /* Sagem XG703A */ + {USB_DEVICE(0x0cde, 0x0015)}, /* Zcomax XG-705A */ {USB_DEVICE(0x0d8e, 0x3762)}, /* DLink DWL-G120 Cohiba */ {USB_DEVICE(0x124a, 0x4025)}, /* IOGear GWU513 (GW3887IK chip) */ {USB_DEVICE(0x1260, 0xee22)}, /* SMC 2862W-G version 2 */ @@ -426,12 +427,16 @@ static int p54u_firmware_reset_3887(struct ieee80211_hw *dev) { struct p54u_priv *priv = dev->priv; - u8 buf[4]; + u8 *buf; int ret; - memcpy(&buf, p54u_romboot_3887, sizeof(buf)); + buf = kmalloc(4, GFP_KERNEL); + if (!buf) + return -ENOMEM; + memcpy(buf, p54u_romboot_3887, 4); ret = p54u_bulk_msg(priv, P54U_PIPE_DATA, - buf, sizeof(buf)); + buf, 4); + kfree(buf); if (ret) dev_err(&priv->udev->dev, "(p54usb) unable to jump to " "boot ROM (%d)!\n", ret); --- linux-ec2-2.6.31.orig/drivers/net/can/vcan.c +++ linux-ec2-2.6.31/drivers/net/can/vcan.c @@ -80,7 +80,7 @@ skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_rx(skb); + netif_rx_ni(skb); } static int vcan_tx(struct sk_buff *skb, struct net_device *dev) --- linux-ec2-2.6.31.orig/drivers/net/tulip/tulip.h +++ linux-ec2-2.6.31/drivers/net/tulip/tulip.h @@ -38,7 +38,10 @@ #define TULIP_BAR 0 /* CBIO */ #endif - +#ifndef PCI_ULI5261_ID +#define PCI_ULI5261_ID 0x526110B9 /* ULi M5261 ID*/ +#define PCI_ULI5263_ID 0x526310B9 /* ULi M5263 ID*/ +#endif struct tulip_chip_table { char *chip_name; --- linux-ec2-2.6.31.orig/drivers/net/tulip/tulip_core.c +++ linux-ec2-2.6.31/drivers/net/tulip/tulip_core.c @@ -228,8 +228,12 @@ { 0x1259, 0xa120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 }, { 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 }, + /* Ubuntu: On non-sparc, this seems to be handled better by the + * dmfe driver. */ +#ifdef __sparc__ { 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, { 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, +#endif { 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, { 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 }, { 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, @@ -393,6 +397,11 @@ goto media_picked; } } + if (tp->chip_id == PCI_ULI5261_ID || tp->chip_id == PCI_ULI5263_ID) { + for (i = tp->mtable->leafcount - 1; i >= 0; i--) + if (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaIsMII) + goto media_picked; + } /* Start sensing first non-full-duplex media. */ for (i = tp->mtable->leafcount - 1; (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--) --- linux-ec2-2.6.31.orig/drivers/net/e1000e/82571.c +++ linux-ec2-2.6.31/drivers/net/e1000e/82571.c @@ -1803,7 +1803,7 @@ | FLAG_HAS_AMT | FLAG_HAS_CTRLEXT_ON_LOAD, .pba = 20, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_bm, @@ -1820,7 +1820,7 @@ | FLAG_HAS_AMT | FLAG_HAS_CTRLEXT_ON_LOAD, .pba = 20, - .max_hw_frame_size = DEFAULT_JUMBO, + .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_bm, --- linux-ec2-2.6.31.orig/drivers/net/e1000e/netdev.c +++ linux-ec2-2.6.31/drivers/net/e1000e/netdev.c @@ -482,14 +482,24 @@ length = le16_to_cpu(rx_desc->length); - /* !EOP means multiple descriptors were used to store a single - * packet, also make sure the frame isn't just CRC only */ - if (!(status & E1000_RXD_STAT_EOP) || (length <= 4)) { + /* + * !EOP means multiple descriptors were used to store a single + * packet, if that's the case we need to toss it. In fact, we + * need to toss every packet with the EOP bit clear and the + * next frame that _does_ have the EOP bit set, as it is by + * definition only a frame fragment + */ + if (unlikely(!(status & E1000_RXD_STAT_EOP))) + adapter->flags2 |= FLAG2_IS_DISCARDING; + + if (adapter->flags2 & FLAG2_IS_DISCARDING) { /* All receives must fit into a single buffer */ e_dbg("%s: Receive packet consumed multiple buffers\n", netdev->name); /* recycle */ buffer_info->skb = skb; + if (status & E1000_RXD_STAT_EOP) + adapter->flags2 &= ~FLAG2_IS_DISCARDING; goto next_desc; } @@ -747,10 +757,16 @@ PCI_DMA_FROMDEVICE); buffer_info->dma = 0; - if (!(staterr & E1000_RXD_STAT_EOP)) { + /* see !EOP comment in other rx routine */ + if (!(staterr & E1000_RXD_STAT_EOP)) + adapter->flags2 |= FLAG2_IS_DISCARDING; + + if (adapter->flags2 & FLAG2_IS_DISCARDING) { e_dbg("%s: Packet Split buffers didn't pick up the " "full packet\n", netdev->name); dev_kfree_skb_irq(skb); + if (staterr & E1000_RXD_STAT_EOP) + adapter->flags2 &= ~FLAG2_IS_DISCARDING; goto next_desc; } @@ -1120,6 +1136,7 @@ rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + adapter->flags2 &= ~FLAG2_IS_DISCARDING; writel(0, adapter->hw.hw_addr + rx_ring->head); writel(0, adapter->hw.hw_addr + rx_ring->tail); @@ -2330,18 +2347,6 @@ rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; switch (adapter->rx_buffer_len) { - case 256: - rctl |= E1000_RCTL_SZ_256; - rctl &= ~E1000_RCTL_BSEX; - break; - case 512: - rctl |= E1000_RCTL_SZ_512; - rctl &= ~E1000_RCTL_BSEX; - break; - case 1024: - rctl |= E1000_RCTL_SZ_1024; - rctl &= ~E1000_RCTL_BSEX; - break; case 2048: default: rctl |= E1000_RCTL_SZ_2048; @@ -4301,13 +4306,7 @@ * fragmented skbs */ - if (max_frame <= 256) - adapter->rx_buffer_len = 256; - else if (max_frame <= 512) - adapter->rx_buffer_len = 512; - else if (max_frame <= 1024) - adapter->rx_buffer_len = 1024; - else if (max_frame <= 2048) + if (max_frame <= 2048) adapter->rx_buffer_len = 2048; else adapter->rx_buffer_len = 4096; @@ -4985,7 +4984,7 @@ /* AER (Advanced Error Reporting) hooks */ err = pci_enable_pcie_error_reporting(pdev); if (err) { - dev_err(&pdev->dev, "pci_enable_pcie_error_reporting failed " + dev_notice(&pdev->dev, "pci_enable_pcie_error_reporting failed " "0x%x\n", err); /* non-fatal, continue */ } @@ -5365,6 +5364,7 @@ { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V_2), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan }, --- linux-ec2-2.6.31.orig/drivers/net/e1000e/e1000.h +++ linux-ec2-2.6.31/drivers/net/e1000e/e1000.h @@ -401,6 +401,7 @@ /* CRC Stripping defines */ #define FLAG2_CRC_STRIPPING (1 << 0) #define FLAG2_HAS_PHY_WAKEUP (1 << 1) +#define FLAG2_IS_DISCARDING (1 << 2) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) --- linux-ec2-2.6.31.orig/drivers/net/e1000e/hw.h +++ linux-ec2-2.6.31/drivers/net/e1000e/hw.h @@ -368,6 +368,7 @@ #define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 #define E1000_DEV_ID_ICH9_IGP_M 0x10BF #define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB +#define E1000_DEV_ID_ICH9_IGP_M_V_2 0x10BE #define E1000_DEV_ID_ICH9_IGP_C 0x294C #define E1000_DEV_ID_ICH9_IFE 0x10C0 #define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 --- linux-ec2-2.6.31.orig/drivers/net/appletalk/ipddp.c +++ linux-ec2-2.6.31/drivers/net/appletalk/ipddp.c @@ -176,8 +176,7 @@ dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - if(aarp_send_ddp(rt->dev, skb, &rt->at, NULL) < 0) - dev_kfree_skb(skb); + aarp_send_ddp(rt->dev, skb, &rt->at, NULL); spin_unlock(&ipddp_route_lock); --- linux-ec2-2.6.31.orig/drivers/net/bonding/bond_main.c +++ linux-ec2-2.6.31/drivers/net/bonding/bond_main.c @@ -691,7 +691,7 @@ struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - static int (*ioctl)(struct net_device *, struct ifreq *, int); + int (*ioctl)(struct net_device *, struct ifreq *, int); struct ifreq ifr; struct mii_ioctl_data *mii; @@ -3707,10 +3707,10 @@ if (skb->protocol == htons(ETH_P_IP)) { return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ - (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count; + (data->h_dest[5] ^ data->h_source[5])) % count; } - return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; + return (data->h_dest[5] ^ data->h_source[5]) % count; } /* @@ -3737,7 +3737,7 @@ } - return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; + return (data->h_dest[5] ^ data->h_source[5]) % count; } /* @@ -3748,7 +3748,7 @@ { struct ethhdr *data = (struct ethhdr *)skb->data; - return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; + return (data->h_dest[5] ^ data->h_source[5]) % count; } /*-------------------------- Device entry points ----------------------------*/ --- linux-ec2-2.6.31.orig/drivers/net/usb/hso.c +++ linux-ec2-2.6.31/drivers/net/usb/hso.c @@ -1362,7 +1362,7 @@ /* reset the rts and dtr */ /* do the actual close */ serial->open_count--; - kref_put(&serial->parent->ref, hso_serial_ref_free); + if (serial->open_count <= 0) { serial->open_count = 0; spin_lock_irq(&serial->serial_lock); @@ -1382,6 +1382,8 @@ usb_autopm_put_interface(serial->parent->interface); mutex_unlock(&serial->parent->mutex); + + kref_put(&serial->parent->ref, hso_serial_ref_free); } /* close the requested serial port */ --- linux-ec2-2.6.31.orig/drivers/net/usb/kaweth.c +++ linux-ec2-2.6.31/drivers/net/usb/kaweth.c @@ -263,6 +263,7 @@ int timeout) { struct usb_ctrlrequest *dr; + int retval; dbg("kaweth_control()"); @@ -278,18 +279,21 @@ return -ENOMEM; } - dr->bRequestType= requesttype; + dr->bRequestType = requesttype; dr->bRequest = request; dr->wValue = cpu_to_le16(value); dr->wIndex = cpu_to_le16(index); dr->wLength = cpu_to_le16(size); - return kaweth_internal_control_msg(kaweth->dev, - pipe, - dr, - data, - size, - timeout); + retval = kaweth_internal_control_msg(kaweth->dev, + pipe, + dr, + data, + size, + timeout); + + kfree(dr); + return retval; } /**************************************************************** --- linux-ec2-2.6.31.orig/drivers/net/usb/usbnet.c +++ linux-ec2-2.6.31/drivers/net/usb/usbnet.c @@ -988,7 +988,7 @@ * NOTE: strictly conforming cdc-ether devices should expect * the ZLP here, but ignore the one-byte packet. */ - if ((length % dev->maxpacket) == 0) { + if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) { urb->transfer_buffer_length++; if (skb_tailroom(skb)) { skb->data[skb->len] = 0; --- linux-ec2-2.6.31.orig/drivers/net/usb/Kconfig +++ linux-ec2-2.6.31/drivers/net/usb/Kconfig @@ -174,7 +174,7 @@ * Ericsson Mobile Broadband Module (all variants) * Motorola (DM100 and SB4100) * Broadcom Cable Modem (reference design) - * Toshiba (PCX1100U and F3507g) + * Toshiba (PCX1100U and F3507g/F3607gw) * ... This driver creates an interface named "ethX", where X depends on --- linux-ec2-2.6.31.orig/drivers/net/usb/rtl8150.c +++ linux-ec2-2.6.31/drivers/net/usb/rtl8150.c @@ -324,7 +324,7 @@ dbg("%02X:", netdev->dev_addr[i]); dbg("%02X\n", netdev->dev_addr[i]); /* Set the IDR registers. */ - set_registers(dev, IDR, sizeof(netdev->dev_addr), netdev->dev_addr); + set_registers(dev, IDR, netdev->addr_len, netdev->dev_addr); #ifdef EEPROM_WRITE { u8 cr; --- linux-ec2-2.6.31.orig/drivers/net/usb/smsc95xx.c +++ linux-ec2-2.6.31/drivers/net/usb/smsc95xx.c @@ -1232,7 +1232,7 @@ .rx_fixup = smsc95xx_rx_fixup, .tx_fixup = smsc95xx_tx_fixup, .status = smsc95xx_status, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_SEND_ZLP, }; static const struct usb_device_id products[] = { --- linux-ec2-2.6.31.orig/drivers/net/usb/cdc_ether.c +++ linux-ec2-2.6.31/drivers/net/usb/cdc_ether.c @@ -544,20 +544,60 @@ USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, }, { - /* Ericsson F3307 */ + /* Ericsson F3607gw ver 2 */ + USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1905, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { + /* Ericsson F3607gw ver 3 */ USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1906, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, }, { + /* Ericsson F3307 */ + USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x190a, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { + /* Ericsson F3307 ver 2 */ + USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1909, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { + /* Ericsson C3607w */ + USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1049, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { /* Toshiba F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, }, { + /* Toshiba F3607gw */ + USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130c, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { + /* Toshiba F3607gw ver 2 */ + USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x1311, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { /* Dell F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8147, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, +}, { + /* Dell F3607gw */ + USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8183, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, +}, { + /* Dell F3607gw ver 2 */ + USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x8184, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &cdc_info, }, { }, // END }; --- linux-ec2-2.6.31.orig/drivers/net/mlx4/mlx4.h +++ linux-ec2-2.6.31/drivers/net/mlx4/mlx4.h @@ -205,9 +205,7 @@ void __iomem **uar_map; u32 clr_mask; struct mlx4_eq *eq; - u64 icm_virt; - struct page *icm_page; - dma_addr_t icm_dma; + struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; int have_irq; u8 inta_pin; @@ -373,9 +371,6 @@ struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca); -int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt); -void mlx4_unmap_eq_icm(struct mlx4_dev *dev); - int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); --- linux-ec2-2.6.31.orig/drivers/net/mlx4/eq.c +++ linux-ec2-2.6.31/drivers/net/mlx4/eq.c @@ -526,48 +526,6 @@ iounmap(priv->clr_base); } -int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - int ret; - - /* - * We assume that mapping one page is enough for the whole EQ - * context table. This is fine with all current HCAs, because - * we only use 32 EQs and each EQ uses 64 bytes of context - * memory, or 1 KB total. - */ - priv->eq_table.icm_virt = icm_virt; - priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER); - if (!priv->eq_table.icm_page) - return -ENOMEM; - priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) { - __free_page(priv->eq_table.icm_page); - return -ENOMEM; - } - - ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt); - if (ret) { - pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - __free_page(priv->eq_table.icm_page); - } - - return ret; -} - -void mlx4_unmap_eq_icm(struct mlx4_dev *dev) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - - mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1); - pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - __free_page(priv->eq_table.icm_page); -} - int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); --- linux-ec2-2.6.31.orig/drivers/net/mlx4/main.c +++ linux-ec2-2.6.31/drivers/net/mlx4/main.c @@ -525,7 +525,10 @@ goto err_unmap_aux; } - err = mlx4_map_eq_icm(dev, init_hca->eqc_base); + err = mlx4_init_icm_table(dev, &priv->eq_table.table, + init_hca->eqc_base, dev_cap->eqc_entry_sz, + dev->caps.num_eqs, dev->caps.num_eqs, + 0, 0); if (err) { mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_cmpt; @@ -668,7 +671,7 @@ mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); err_unmap_eq: - mlx4_unmap_eq_icm(dev); + mlx4_cleanup_icm_table(dev, &priv->eq_table.table); err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); @@ -698,11 +701,11 @@ mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); + mlx4_cleanup_icm_table(dev, &priv->eq_table.table); mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); - mlx4_unmap_eq_icm(dev); mlx4_UNMAP_ICM_AUX(dev); mlx4_free_icm(dev, priv->fw.aux_icm, 0); --- linux-ec2-2.6.31.orig/drivers/net/sfc/falcon.c +++ linux-ec2-2.6.31/drivers/net/sfc/falcon.c @@ -36,14 +36,25 @@ /** * struct falcon_nic_data - Falcon NIC state + * @sram_cfg: SRAM configuration value + * @tx_dc_base: Base address in SRAM of TX queue descriptor caches + * @rx_dc_base: Base address in SRAM of RX queue descriptor caches * @next_buffer_table: First available buffer table id + * @resources: Resource information for driverlink client * @pci_dev2: The secondary PCI device if present * @i2c_data: Operations and state for I2C bit-bashing algorithm * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired */ struct falcon_nic_data { + int sram_cfg; + unsigned tx_dc_base; + unsigned rx_dc_base; +#ifndef CONFIG_SFC_DRIVERLINK unsigned next_buffer_table; +#else + struct efx_dl_falcon_resources resources; +#endif struct pci_dev *pci_dev2; struct i2c_algo_bit_data i2c_data; @@ -69,11 +80,11 @@ */ #define TX_DC_ENTRIES 16 #define TX_DC_ENTRIES_ORDER 0 -#define TX_DC_BASE 0x130000 +#define TX_DC_INTERNAL_BASE 0x130000 #define RX_DC_ENTRIES 64 #define RX_DC_ENTRIES_ORDER 2 -#define RX_DC_BASE 0x100000 +#define RX_DC_INTERNAL_BASE 0x100000 static const unsigned int /* "Large" EEPROM device: Atmel AT25640 or similar @@ -336,8 +347,13 @@ memset(buffer->addr, 0xff, len); /* Select new buffer ID */ +#ifndef CONFIG_SFC_DRIVERLINK buffer->index = nic_data->next_buffer_table; nic_data->next_buffer_table += buffer->entries; +#else + buffer->index = nic_data->resources.buffer_table_min; + nic_data->resources.buffer_table_min += buffer->entries; +#endif EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x " "(virt %p phys %llx)\n", buffer->index, @@ -458,9 +474,17 @@ int falcon_probe_tx(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; - return falcon_alloc_special_buffer(efx, &tx_queue->txd, - FALCON_TXD_RING_SIZE * - sizeof(efx_qword_t)); + int rc = falcon_alloc_special_buffer(efx, &tx_queue->txd, + FALCON_TXD_RING_SIZE * + sizeof(efx_qword_t)); +#ifdef CONFIG_SFC_DRIVERLINK + if (rc == 0) { + struct falcon_nic_data *nic_data = efx->nic_data; + nic_data->resources.txq_min = max(nic_data->resources.txq_min, + (unsigned)tx_queue->queue + 1); + } +#endif + return rc; } void falcon_init_tx(struct efx_tx_queue *tx_queue) @@ -600,9 +624,17 @@ int falcon_probe_rx(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; - return falcon_alloc_special_buffer(efx, &rx_queue->rxd, - FALCON_RXD_RING_SIZE * - sizeof(efx_qword_t)); + int rc = falcon_alloc_special_buffer(efx, &rx_queue->rxd, + FALCON_RXD_RING_SIZE * + sizeof(efx_qword_t)); +#ifdef CONFIG_SFC_DRIVERLINK + if (rc == 0) { + struct falcon_nic_data *nic_data = efx->nic_data; + nic_data->resources.rxq_min = max(nic_data->resources.rxq_min, + (unsigned)rx_queue->queue + 1); + } +#endif + return rc; } void falcon_init_rx(struct efx_rx_queue *rx_queue) @@ -960,10 +992,12 @@ case TX_DESCQ_FLS_DONE_EV_DECODE: EFX_TRACE(efx, "channel %d TXQ %d flushed\n", channel->channel, ev_sub_data); + EFX_DL_CALLBACK(efx, event, event); break; case RX_DESCQ_FLS_DONE_EV_DECODE: EFX_TRACE(efx, "channel %d RXQ %d flushed\n", channel->channel, ev_sub_data); + EFX_DL_CALLBACK(efx, event, event); break; case EVQ_INIT_DONE_EV_DECODE: EFX_LOG(efx, "channel %d EVQ %d initialised\n", @@ -972,14 +1006,17 @@ case SRM_UPD_DONE_EV_DECODE: EFX_TRACE(efx, "channel %d SRAM update done\n", channel->channel); + EFX_DL_CALLBACK(efx, event, event); break; case WAKE_UP_EV_DECODE: EFX_TRACE(efx, "channel %d RXQ %d wakeup event\n", channel->channel, ev_sub_data); + EFX_DL_CALLBACK(efx, event, event); break; case TIMER_EV_DECODE: EFX_TRACE(efx, "channel %d RX queue %d timer expired\n", channel->channel, ev_sub_data); + EFX_DL_CALLBACK(efx, event, event); break; case RX_RECOVERY_EV_DECODE: EFX_ERR(efx, "channel %d seen DRIVER RX_RESET event. " @@ -1004,6 +1041,7 @@ EFX_TRACE(efx, "channel %d unknown driver event code %d " "data %04x\n", channel->channel, ev_sub_code, ev_sub_data); + EFX_DL_CALLBACK(efx, event, event); break; } } @@ -1104,9 +1142,18 @@ { struct efx_nic *efx = channel->efx; unsigned int evq_size; + int rc; evq_size = FALCON_EVQ_SIZE * sizeof(efx_qword_t); - return falcon_alloc_special_buffer(efx, &channel->eventq, evq_size); + rc = falcon_alloc_special_buffer(efx, &channel->eventq, evq_size); +#ifdef CONFIG_SFC_DRIVERLINK + if (rc == 0) { + struct falcon_nic_data *nic_data = efx->nic_data; + nic_data->resources.evq_int_min = max(nic_data->resources.evq_int_min, + (unsigned)channel->channel + 1); + } +#endif + return rc; } void falcon_init_eventq(struct efx_channel *channel) @@ -2602,19 +2649,22 @@ */ static int falcon_reset_sram(struct efx_nic *efx) { + struct falcon_nic_data *nic_data = efx->nic_data; efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker; - int count; + int count, onchip, sram_cfg_val; /* Set the SRAM wake/sleep GPIO appropriately. */ + onchip = (nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY); falcon_read(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OEN, 1); - EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, 1); + EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, onchip); falcon_write(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); /* Initiate SRAM reset */ + sram_cfg_val = onchip ? 0 : nic_data->sram_cfg; EFX_POPULATE_OWORD_2(srm_cfg_reg_ker, SRAM_OOB_BT_INIT_EN, 1, - SRM_NUM_BANKS_AND_BANK_SIZE, 0); + SRM_NUM_BANKS_AND_BANK_SIZE, sram_cfg_val); falcon_write(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER); /* Wait for SRAM reset to complete */ @@ -2686,8 +2736,10 @@ /* Extract non-volatile configuration */ static int falcon_probe_nvconfig(struct efx_nic *efx) { + struct falcon_nic_data *nic_data = efx->nic_data; struct falcon_nvconfig *nvconfig; int board_rev; + bool onchip_sram; int rc; nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL); @@ -2700,6 +2752,7 @@ efx->phy_type = PHY_TYPE_NONE; efx->mdio.prtad = MDIO_PRTAD_NONE; board_rev = 0; + onchip_sram = true; rc = 0; } else if (rc) { goto fail1; @@ -2710,6 +2763,13 @@ efx->phy_type = v2->port0_phy_type; efx->mdio.prtad = v2->port0_phy_addr; board_rev = le16_to_cpu(v2->board_revision); +#ifdef CONFIG_SFC_DRIVERLINK + onchip_sram = EFX_OWORD_FIELD(nvconfig->nic_stat_reg, + ONCHIP_SRAM); +#else + /* We have no use for external SRAM */ + onchip_sram = true; +#endif if (le16_to_cpu(nvconfig->board_struct_ver) >= 3) { __le32 fl = v3->spi_device_type[EE_SPI_FLASH]; @@ -2734,6 +2794,21 @@ efx_set_board_info(efx, board_rev); + /* Read the SRAM configuration. The register is initialised + * automatically but might may been reset since boot. + */ + if (onchip_sram) { + nic_data->sram_cfg = SRM_NB_BSZ_ONCHIP_ONLY; + } else { + nic_data->sram_cfg = + EFX_OWORD_FIELD(nvconfig->srm_cfg_reg, + SRM_NUM_BANKS_AND_BANK_SIZE); + WARN_ON(nic_data->sram_cfg == SRM_NB_BSZ_RESERVED); + /* Replace invalid setting with the smallest defaults */ + if (nic_data->sram_cfg == SRM_NB_BSZ_DEFAULT) + nic_data->sram_cfg = SRM_NB_BSZ_1BANKS_2M; + } + kfree(nvconfig); return 0; @@ -2744,6 +2819,109 @@ return rc; } +/* Looks at available SRAM resources and silicon revision, and works out + * how many queues we can support, and where things like descriptor caches + * should live. */ +static int falcon_dimension_resources(struct efx_nic *efx) +{ + struct falcon_nic_data *nic_data = efx->nic_data; +#ifdef CONFIG_SFC_DRIVERLINK + unsigned internal_dcs_entries; + struct efx_dl_falcon_resources *res = &nic_data->resources; + + /* Fill out the driverlink resource list */ + res->hdr.type = EFX_DL_FALCON_RESOURCES; + res->biu_lock = &efx->biu_lock; + efx->dl_info = &res->hdr; + + /* NB. The minimum values get increased as this driver initialises + * its resources, so this should prevent any overlap. + */ + switch (falcon_rev(efx)) { + case FALCON_REV_A1: + res->rxq_min = 16; + res->txq_min = 16; + res->evq_int_min = 4; + res->evq_int_lim = 5; + res->evq_timer_min = 5; + res->evq_timer_lim = 4096; + internal_dcs_entries = 8192; + break; + case FALCON_REV_B0: + default: + res->rxq_min = 0; + res->txq_min = 0; + res->evq_int_min = 0; + res->evq_int_lim = 64; + res->evq_timer_min = 64; + res->evq_timer_lim = 4096; + internal_dcs_entries = 4096; + break; + } + + if (nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY) { + res->rxq_lim = internal_dcs_entries / RX_DC_ENTRIES; + res->txq_lim = internal_dcs_entries / TX_DC_ENTRIES; + res->buffer_table_lim = 8192; + nic_data->tx_dc_base = TX_DC_INTERNAL_BASE; + nic_data->rx_dc_base = RX_DC_INTERNAL_BASE; + } else { + unsigned sram_bytes, vnic_bytes, max_vnics, n_vnics, dcs; + + /* Determine how much SRAM we have to play with. We have + * to fit buffer table and descriptor caches in. + */ + switch (nic_data->sram_cfg) { + case SRM_NB_BSZ_1BANKS_2M: + default: + sram_bytes = 2 * 1024 * 1024; + break; + case SRM_NB_BSZ_1BANKS_4M: + case SRM_NB_BSZ_2BANKS_4M: + sram_bytes = 4 * 1024 * 1024; + break; + case SRM_NB_BSZ_1BANKS_8M: + case SRM_NB_BSZ_2BANKS_8M: + sram_bytes = 8 * 1024 * 1024; + break; + case SRM_NB_BSZ_2BANKS_16M: + sram_bytes = 16 * 1024 * 1024; + break; + } + /* For each VNIC allow at least 512 buffer table entries + * and descriptor cache for an rxq and txq. Buffer table + * space for evqs and dmaqs is relatively trivial, so not + * considered in this calculation. + */ + vnic_bytes = 512 * 8 + RX_DC_ENTRIES * 8 + TX_DC_ENTRIES * 8; + max_vnics = sram_bytes / vnic_bytes; + for (n_vnics = 1; n_vnics < res->evq_timer_min + max_vnics;) + n_vnics *= 2; + res->rxq_lim = n_vnics; + res->txq_lim = n_vnics; + + dcs = n_vnics * TX_DC_ENTRIES * 8; + nic_data->tx_dc_base = sram_bytes - dcs; + dcs = n_vnics * RX_DC_ENTRIES * 8; + nic_data->rx_dc_base = nic_data->tx_dc_base - dcs; + res->buffer_table_lim = nic_data->rx_dc_base / 8; + } + + if (FALCON_IS_DUAL_FUNC(efx)) + res->flags |= EFX_DL_FALCON_DUAL_FUNC; + + if (EFX_INT_MODE_USE_MSI(efx)) + res->flags |= EFX_DL_FALCON_USE_MSI; +#else + /* We ignore external SRAM */ + EFX_BUG_ON_PARANOID(nic_data->sram_cfg != SRM_NB_BSZ_ONCHIP_ONLY); + nic_data->tx_dc_base = TX_DC_INTERNAL_BASE; + nic_data->rx_dc_base = RX_DC_INTERNAL_BASE; +#endif + + return 0; +} + /* Probe the NIC variant (revision, ASIC vs FPGA, function count, port * count, port speed). Set workaround and feature flags accordingly. */ @@ -2771,9 +2949,11 @@ EFX_ERR(efx, "Falcon rev A1 PCI-X not supported\n"); return -ENODEV; } + efx->silicon_rev = "falcon/a1"; break; case FALCON_REV_B0: + efx->silicon_rev = "falcon/b0"; break; default: @@ -2883,6 +3063,10 @@ if (rc) goto fail5; + rc = falcon_dimension_resources(efx); + if (rc) + goto fail6; + /* Initialise I2C adapter */ efx->i2c_adap.owner = THIS_MODULE; nic_data->i2c_data = falcon_i2c_bit_operations; @@ -2892,10 +3076,14 @@ strlcpy(efx->i2c_adap.name, "SFC4000 GPIO", sizeof(efx->i2c_adap.name)); rc = i2c_bit_add_bus(&efx->i2c_adap); if (rc) - goto fail5; + goto fail6; return 0; + fail6: +#ifdef CONFIG_SFC_DRIVERLINK + efx->dl_info = NULL; +#endif fail5: falcon_remove_spi_devices(efx); falcon_free_buffer(efx, &efx->irq_status); @@ -2917,13 +3105,15 @@ */ int falcon_init_nic(struct efx_nic *efx) { + struct falcon_nic_data *nic_data = efx->nic_data; efx_oword_t temp; unsigned thresh; int rc; - /* Use on-chip SRAM */ + /* Use on-chip SRAM if wanted. */ falcon_read(efx, &temp, NIC_STAT_REG); - EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, 1); + EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, + nic_data->sram_cfg == SRM_NB_BSZ_ONCHIP_ONLY); falcon_write(efx, &temp, NIC_STAT_REG); /* Set the source of the GMAC clock */ @@ -2942,9 +3132,9 @@ return rc; /* Set positions of descriptor caches in SRAM. */ - EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, TX_DC_BASE / 8); + EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, nic_data->tx_dc_base / 8); falcon_write(efx, &temp, SRM_TX_DC_CFG_REG_KER); - EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, RX_DC_BASE / 8); + EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, nic_data->rx_dc_base / 8); falcon_write(efx, &temp, SRM_RX_DC_CFG_REG_KER); /* Set TX descriptor cache size. */ @@ -3083,6 +3273,9 @@ /* Tear down the private nic state */ kfree(efx->nic_data); efx->nic_data = NULL; +#ifdef CONFIG_SFC_DRIVERLINK + efx->dl_info = NULL; +#endif } void falcon_update_nic_stats(struct efx_nic *efx) --- linux-ec2-2.6.31.orig/drivers/net/sfc/rx.c +++ linux-ec2-2.6.31/drivers/net/sfc/rx.c @@ -444,9 +444,24 @@ * the appropriate LRO method */ static void efx_rx_packet_lro(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf) + struct efx_rx_buffer *rx_buf, + bool checksummed) { + struct efx_nic *efx = channel->efx; struct napi_struct *napi = &channel->napi_str; + enum efx_veto veto; + + /* It would be faster if we had access to packets at the + * other side of generic LRO. Unfortunately, there isn't + * an obvious interface to this, so veto packets before LRO */ + veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); + if (unlikely(veto)) { + EFX_DL_LOG(efx, "LRO RX vetoed by driverlink %s driver\n", + efx->dl_cb_dev.rx_packet->driver->name); + /* Free the buffer now */ + efx_free_rx_buffer(efx, rx_buf); + return; + } /* Pass the skb/page into the LRO engine */ if (rx_buf->page) { @@ -466,7 +481,8 @@ skb->len = rx_buf->len; skb->data_len = rx_buf->len; skb->truesize += rx_buf->len; - skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = + checksummed ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; napi_gro_frags(napi); @@ -475,6 +491,7 @@ rx_buf->page = NULL; } else { EFX_BUG_ON_PARANOID(!rx_buf->skb); + EFX_BUG_ON_PARANOID(!checksummed); napi_gro_receive(napi, rx_buf->skb); rx_buf->skb = NULL; @@ -547,6 +564,7 @@ struct efx_rx_buffer *rx_buf, bool checksummed) { struct efx_nic *efx = channel->efx; + enum efx_veto veto; struct sk_buff *skb; /* If we're in loopback test, then pass the packet directly to the @@ -558,6 +576,16 @@ goto done; } + /* Allow callback to veto the packet */ + veto = EFX_DL_CALLBACK(efx, rx_packet, rx_buf->data, rx_buf->len); + if (unlikely(veto)) { + EFX_DL_LOG(efx, "RX vetoed by driverlink %s driver\n", + efx->dl_cb_dev.rx_packet->driver->name); + /* Free the buffer now */ + efx_free_rx_buffer(efx, rx_buf); + goto done; + } + if (rx_buf->skb) { prefetch(skb_shinfo(rx_buf->skb)); @@ -570,7 +598,7 @@ } if (likely(checksummed || rx_buf->page)) { - efx_rx_packet_lro(channel, rx_buf); + efx_rx_packet_lro(channel, rx_buf, checksummed); goto done; } --- linux-ec2-2.6.31.orig/drivers/net/sfc/tx.c +++ linux-ec2-2.6.31/drivers/net/sfc/tx.c @@ -376,6 +376,7 @@ { struct efx_nic *efx = netdev_priv(net_dev); struct efx_tx_queue *tx_queue; + enum efx_veto veto; if (unlikely(efx->port_inhibited)) return NETDEV_TX_BUSY; @@ -385,6 +386,17 @@ else tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM]; + /* See if driverlink wants to veto the packet. */ + veto = EFX_DL_CALLBACK(efx, tx_packet, skb); + if (unlikely(veto)) { + EFX_DL_LOG(efx, "TX queue %d packet vetoed by " + "driverlink %s driver\n", tx_queue->queue, + efx->dl_cb_dev.tx_packet->driver->name); + /* Free the skb; nothing else will do it */ + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + return efx_xmit(efx, tx_queue, skb); } --- linux-ec2-2.6.31.orig/drivers/net/sfc/efx.c +++ linux-ec2-2.6.31/drivers/net/sfc/efx.c @@ -1486,12 +1486,21 @@ efx_stop_all(efx); + /* Ask driverlink client if we can change MTU */ + rc = EFX_DL_CALLBACK(efx, request_mtu, new_mtu); + if (rc) + goto out; + EFX_LOG(efx, "changing MTU to %d\n", new_mtu); efx_fini_channels(efx); net_dev->mtu = new_mtu; efx_init_channels(efx); + /* Notify driverlink client of new MTU */ + EFX_DL_CALLBACK(efx, mtu_changed, new_mtu); + + out: efx_start_all(efx); return rc; } @@ -1671,6 +1680,25 @@ * Device reset and suspend * **************************************************************************/ +#ifdef CONFIG_SFC_DRIVERLINK +/* Serialise access to the driverlink callbacks, by quiescing event processing + * (without flushing the descriptor queues), and acquiring the rtnl_lock */ +void efx_suspend(struct efx_nic *efx) +{ + EFX_LOG(efx, "suspending operations\n"); + + rtnl_lock(); + efx_stop_all(efx); +} + +void efx_resume(struct efx_nic *efx) +{ + EFX_LOG(efx, "resuming operations\n"); + + efx_start_all(efx); + rtnl_unlock(); +} +#endif /* Tears down the entire software state and most of the hardware state * before reset. */ @@ -1751,8 +1779,8 @@ enum reset_type method = efx->reset_pending; int rc = 0; - /* Serialise with kernel interfaces */ rtnl_lock(); + efx_dl_reset_suspend(efx); /* If we're not RUNNING then don't reset. Leave the reset_pending * flag set so that efx_pci_probe_main will be retried */ @@ -1798,6 +1826,7 @@ } out_unlock: + efx_dl_reset_resume(efx, 1); rtnl_unlock(); return rc; } @@ -1942,6 +1971,11 @@ efx->mac_op = &efx_dummy_mac_operations; efx->phy_op = &efx_dummy_phy_operations; efx->mdio.dev = net_dev; +#ifdef CONFIG_SFC_DRIVERLINK + INIT_LIST_HEAD(&efx->dl_node); + INIT_LIST_HEAD(&efx->dl_device_list); + efx->dl_cb = efx_default_callbacks; +#endif INIT_WORK(&efx->phy_work, efx_phy_work); INIT_WORK(&efx->mac_work, efx_mac_work); atomic_set(&efx->netif_stop_count, 1); @@ -2045,6 +2079,7 @@ efx = pci_get_drvdata(pci_dev); if (!efx) return; + efx_dl_unregister_nic(efx); /* Mark the NIC as fini, then stop the interface */ rtnl_lock(); @@ -2221,9 +2256,16 @@ if (rc) goto fail5; + /* Register with driverlink layer */ + rc = efx_dl_register_nic(efx); + if (rc) + goto fail6; + EFX_LOG(efx, "initialisation successful\n"); return 0; + fail6: + efx_unregister_netdev(efx); fail5: efx_pci_remove_main(efx); fail4: --- linux-ec2-2.6.31.orig/drivers/net/sfc/driverlink.c +++ linux-ec2-2.6.31/drivers/net/sfc/driverlink.c @@ -0,0 +1,366 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include "net_driver.h" +#include "efx.h" +#include "driverlink.h" + +/* Protects @efx_driverlink_lock and @efx_driver_list */ +static DEFINE_MUTEX(efx_driverlink_lock); + +/* List of all registered drivers */ +static LIST_HEAD(efx_driver_list); + +/* List of all registered Efx ports */ +static LIST_HEAD(efx_port_list); + +/** + * Driver link handle used internally to track devices + * @efx_dev: driverlink device handle exported to consumers + * @efx: efx_nic backing the driverlink device + * @port_node: per-device list head + * @driver_node: per-driver list head + */ +struct efx_dl_handle { + struct efx_dl_device efx_dev; + struct efx_nic *efx; + struct list_head port_node; + struct list_head driver_node; +}; + +static struct efx_dl_handle *efx_dl_handle(struct efx_dl_device *efx_dev) +{ + return container_of(efx_dev, struct efx_dl_handle, efx_dev); +} + +/* Remove an Efx device, and call the driver's remove() callback if + * present. The caller must hold @efx_driverlink_lock. */ +static void efx_dl_del_device(struct efx_dl_device *efx_dev) +{ + struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); + + EFX_INFO(efx_handle->efx, "%s driverlink client unregistering\n", + efx_dev->driver->name); + + if (efx_dev->driver->remove) + efx_dev->driver->remove(efx_dev); + + list_del(&efx_handle->driver_node); + list_del(&efx_handle->port_node); + + kfree(efx_handle); +} + +/* Attempt to probe the given device with the driver, creating a + * new &struct efx_dl_device. If the probe routine returns an error, + * then the &struct efx_dl_device is destroyed */ +static void efx_dl_try_add_device(struct efx_nic *efx, + struct efx_dl_driver *driver) +{ + struct efx_dl_handle *efx_handle; + struct efx_dl_device *efx_dev; + int rc; + + efx_handle = kzalloc(sizeof(*efx_handle), GFP_KERNEL); + if (!efx_handle) + goto fail; + efx_dev = &efx_handle->efx_dev; + efx_handle->efx = efx; + efx_dev->driver = driver; + efx_dev->pci_dev = efx->pci_dev; + INIT_LIST_HEAD(&efx_handle->port_node); + INIT_LIST_HEAD(&efx_handle->driver_node); + + rc = driver->probe(efx_dev, efx->net_dev, + efx->dl_info, efx->silicon_rev); + if (rc) + goto fail; + + list_add_tail(&efx_handle->driver_node, &driver->device_list); + list_add_tail(&efx_handle->port_node, &efx->dl_device_list); + + EFX_INFO(efx, "%s driverlink client registered\n", driver->name); + return; + + fail: + EFX_INFO(efx, "%s driverlink client skipped\n", driver->name); + + kfree(efx_handle); +} + +/* Unregister a driver from the driverlink layer, calling the + * driver's remove() callback for every attached device */ +void efx_dl_unregister_driver(struct efx_dl_driver *driver) +{ + struct efx_dl_handle *efx_handle, *efx_handle_n; + + printk(KERN_INFO "Efx driverlink unregistering %s driver\n", + driver->name); + + mutex_lock(&efx_driverlink_lock); + + list_for_each_entry_safe(efx_handle, efx_handle_n, + &driver->device_list, driver_node) + efx_dl_del_device(&efx_handle->efx_dev); + + list_del(&driver->node); + + mutex_unlock(&efx_driverlink_lock); +} +EXPORT_SYMBOL(efx_dl_unregister_driver); + +/* Register a new driver with the driverlink layer. The driver's + * probe routine will be called for every attached nic. */ +int efx_dl_register_driver(struct efx_dl_driver *driver) +{ + struct efx_nic *efx; + int rc; + + printk(KERN_INFO "Efx driverlink registering %s driver\n", + driver->name); + + INIT_LIST_HEAD(&driver->node); + INIT_LIST_HEAD(&driver->device_list); + + rc = mutex_lock_interruptible(&efx_driverlink_lock); + if (rc) + return rc; + + list_add_tail(&driver->node, &efx_driver_list); + list_for_each_entry(efx, &efx_port_list, dl_node) + efx_dl_try_add_device(efx, driver); + + mutex_unlock(&efx_driverlink_lock); + + return 0; +} +EXPORT_SYMBOL(efx_dl_register_driver); + +void efx_dl_unregister_nic(struct efx_nic *efx) +{ + struct efx_dl_handle *efx_handle, *efx_handle_n; + + mutex_lock(&efx_driverlink_lock); + + list_for_each_entry_safe_reverse(efx_handle, efx_handle_n, + &efx->dl_device_list, + port_node) + efx_dl_del_device(&efx_handle->efx_dev); + + list_del(&efx->dl_node); + + mutex_unlock(&efx_driverlink_lock); +} + +int efx_dl_register_nic(struct efx_nic *efx) +{ + struct efx_dl_driver *driver; + int rc; + + rc = mutex_lock_interruptible(&efx_driverlink_lock); + if (rc) + return rc; + + list_add_tail(&efx->dl_node, &efx_port_list); + list_for_each_entry(driver, &efx_driver_list, node) + efx_dl_try_add_device(efx, driver); + + mutex_unlock(&efx_driverlink_lock); + + return 0; +} + +/* Dummy callback implementations. + * To avoid a branch point on the fast-path, the callbacks are always + * implemented - they are never NULL. + */ +static enum efx_veto efx_dummy_tx_packet_callback(struct efx_dl_device *efx_dev, + struct sk_buff *skb) +{ + return EFX_ALLOW_PACKET; +} + +static enum efx_veto efx_dummy_rx_packet_callback(struct efx_dl_device *efx_dev, + const char *pkt_buf, int len) +{ + return EFX_ALLOW_PACKET; +} + +static int efx_dummy_request_mtu_callback(struct efx_dl_device *efx_dev, + int new_mtu) +{ + return 0; +} + +static void efx_dummy_mtu_changed_callback(struct efx_dl_device *efx_dev, + int mtu) +{ + return; +} + +static void efx_dummy_event_callback(struct efx_dl_device *efx_dev, void *event) +{ + return; +} + +struct efx_dl_callbacks efx_default_callbacks = { + .tx_packet = efx_dummy_tx_packet_callback, + .rx_packet = efx_dummy_rx_packet_callback, + .request_mtu = efx_dummy_request_mtu_callback, + .mtu_changed = efx_dummy_mtu_changed_callback, + .event = efx_dummy_event_callback, +}; + +void efx_dl_unregister_callbacks(struct efx_dl_device *efx_dev, + struct efx_dl_callbacks *callbacks) +{ + struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); + struct efx_nic *efx = efx_handle->efx; + + efx_suspend(efx); + + EFX_INFO(efx, "removing callback hooks into %s driver\n", + efx_dev->driver->name); + + if (callbacks->tx_packet) { + BUG_ON(efx->dl_cb_dev.tx_packet != efx_dev); + efx->dl_cb.tx_packet = efx_default_callbacks.tx_packet; + efx->dl_cb_dev.tx_packet = NULL; + } + if (callbacks->rx_packet) { + BUG_ON(efx->dl_cb_dev.rx_packet != efx_dev); + efx->dl_cb.rx_packet = efx_default_callbacks.rx_packet; + efx->dl_cb_dev.rx_packet = NULL; + } + if (callbacks->request_mtu) { + BUG_ON(efx->dl_cb_dev.request_mtu != efx_dev); + efx->dl_cb.request_mtu = efx_default_callbacks.request_mtu; + efx->dl_cb_dev.request_mtu = NULL; + } + if (callbacks->mtu_changed) { + BUG_ON(efx->dl_cb_dev.mtu_changed != efx_dev); + efx->dl_cb.mtu_changed = efx_default_callbacks.mtu_changed; + efx->dl_cb_dev.mtu_changed = NULL; + } + if (callbacks->event) { + BUG_ON(efx->dl_cb_dev.event != efx_dev); + efx->dl_cb.event = efx_default_callbacks.event; + efx->dl_cb_dev.event = NULL; + } + + efx_resume(efx); +} +EXPORT_SYMBOL(efx_dl_unregister_callbacks); + +int efx_dl_register_callbacks(struct efx_dl_device *efx_dev, + struct efx_dl_callbacks *callbacks) +{ + struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); + struct efx_nic *efx = efx_handle->efx; + int rc = 0; + + efx_suspend(efx); + + /* Check that the requested callbacks are not already hooked. */ + if ((callbacks->tx_packet && efx->dl_cb_dev.tx_packet) || + (callbacks->rx_packet && efx->dl_cb_dev.rx_packet) || + (callbacks->request_mtu && efx->dl_cb_dev.request_mtu) || + (callbacks->mtu_changed && efx->dl_cb_dev.mtu_changed) || + (callbacks->event && efx->dl_cb_dev.event)) { + rc = -EBUSY; + goto out; + } + + EFX_INFO(efx, "adding callback hooks to %s driver\n", + efx_dev->driver->name); + + /* Hook in the requested callbacks, leaving any NULL members + * referencing the members of @efx_default_callbacks */ + if (callbacks->tx_packet) { + efx->dl_cb.tx_packet = callbacks->tx_packet; + efx->dl_cb_dev.tx_packet = efx_dev; + } + if (callbacks->rx_packet) { + efx->dl_cb.rx_packet = callbacks->rx_packet; + efx->dl_cb_dev.rx_packet = efx_dev; + } + if (callbacks->request_mtu) { + efx->dl_cb.request_mtu = callbacks->request_mtu; + efx->dl_cb_dev.request_mtu = efx_dev; + } + if (callbacks->mtu_changed) { + efx->dl_cb.mtu_changed = callbacks->mtu_changed; + efx->dl_cb_dev.mtu_changed = efx_dev; + } + if (callbacks->event) { + efx->dl_cb.event = callbacks->event; + efx->dl_cb_dev.event = efx_dev; + } + + out: + efx_resume(efx); + + return rc; +} +EXPORT_SYMBOL(efx_dl_register_callbacks); + +void efx_dl_schedule_reset(struct efx_dl_device *efx_dev) +{ + struct efx_dl_handle *efx_handle = efx_dl_handle(efx_dev); + struct efx_nic *efx = efx_handle->efx; + + efx_schedule_reset(efx, RESET_TYPE_ALL); +} +EXPORT_SYMBOL(efx_dl_schedule_reset); + +void efx_dl_reset_unlock(void) +{ + mutex_unlock(&efx_driverlink_lock); +} + +/* Suspend ready for reset, serialising against all the driverlink interfacse + * and calling the suspend() callback of every registered driver */ +void efx_dl_reset_suspend(struct efx_nic *efx) +{ + struct efx_dl_handle *efx_handle; + struct efx_dl_device *efx_dev; + + mutex_lock(&efx_driverlink_lock); + + list_for_each_entry_reverse(efx_handle, + &efx->dl_device_list, + port_node) { + efx_dev = &efx_handle->efx_dev; + if (efx_dev->driver->reset_suspend) + efx_dev->driver->reset_suspend(efx_dev); + } +} + +/* Resume after a reset, calling the resume() callback of every registered + * driver, and releasing @Efx_driverlink_lock acquired in + * efx_dl_reset_resume() */ +void efx_dl_reset_resume(struct efx_nic *efx, int ok) +{ + struct efx_dl_handle *efx_handle; + struct efx_dl_device *efx_dev; + + list_for_each_entry(efx_handle, &efx->dl_device_list, + port_node) { + efx_dev = &efx_handle->efx_dev; + if (efx_dev->driver->reset_resume) + efx_dev->driver->reset_resume(efx_dev, ok); + } + + mutex_unlock(&efx_driverlink_lock); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/driverlink_api.h +++ linux-ec2-2.6.31/drivers/net/sfc/driverlink_api.h @@ -0,0 +1,303 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_DRIVERLINK_API_H +#define EFX_DRIVERLINK_API_H + +#include + +/* Forward declarations */ +struct pci_dev; +struct net_device; +struct sk_buff; +struct efx_dl_device; +struct efx_dl_device_info; + +/* An extra safeguard in addition to symbol versioning */ +#define EFX_DRIVERLINK_API_VERSION 2 + +/** + * struct efx_dl_driver - An Efx driverlink device driver + * + * A driverlink client defines and initializes as many instances of + * efx_dl_driver as required, registering each one with + * efx_dl_register_driver(). + * + * @name: Name of the driver + * @probe: Called when device added + * The client should use the @def_info linked list and @silicon_rev + * to determine if they wish to attach to this device. + * Context: process, driverlink semaphore held + * @remove: Called when device removed + * The client must ensure the finish all operations with this + * device before returning from this method. + * Context: process, driverlink semaphore held + * @reset_suspend: Called before device is reset + * Called immediately before a hardware reset. The client must stop all + * hardware processing before returning from this method. Callbacks will + * be inactive when this method is called. + * Context: process, driverlink semaphore held. rtnl_lock may be held + * @reset_resume: Called after device is reset + * Called after a hardware reset. If @ok is true, the client should + * state and resume normal operations. If @ok is false, the client should + * abandon use of the hardware resources. remove() will still be called. + * Context: process, driverlink semaphore held. rtnl_lock may be held + */ +struct efx_dl_driver { + const char *name; + + int (*probe) (struct efx_dl_device *efx_dl_dev, + const struct net_device *net_dev, + const struct efx_dl_device_info *dev_info, + const char *silicon_rev); + void (*remove) (struct efx_dl_device *efx_dev); + void (*reset_suspend) (struct efx_dl_device *efx_dev); + void (*reset_resume) (struct efx_dl_device *efx_dev, int ok); + +/* private: */ + struct list_head node; + struct list_head device_list; +}; + +/** + * enum efx_dl_device_info_type - Device information identifier. + * + * Used to identify each item in the &struct efx_dl_device_info linked list + * provided to each driverlink client in the probe() @dev_info member. + * + * @EFX_DL_FALCON_RESOURCES: Information type is &struct efx_dl_falcon_resources + */ +enum efx_dl_device_info_type { + /** Falcon resources available for export */ + EFX_DL_FALCON_RESOURCES = 0, +}; + +/** + * struct efx_dl_device_info - device information structure + * + * @next: Link to next structure, if any + * @type: Type code for this structure + */ +struct efx_dl_device_info { + struct efx_dl_device_info *next; + enum efx_dl_device_info_type type; +}; + +/** + * enum efx_dl_falcon_resource_flags - Falcon resource information flags. + * + * Flags that describe hardware variations for the current Falcon device. + * + * @EFX_DL_FALCON_DUAL_FUNC: Port is dual-function. + * Certain silicon revisions have two pci functions, and require + * certain hardware resources to be accessed via the secondary + * function + * @EFX_DL_FALCON_USE_MSI: Port is initialised to use MSI/MSI-X interrupts. + * Falcon supports traditional legacy interrupts and MSI/MSI-X + * interrupts. The choice is made at run time by the sfc driver, and + * notified to the clients by this enumeration + */ +enum efx_dl_falcon_resource_flags { + EFX_DL_FALCON_DUAL_FUNC = 0x1, + EFX_DL_FALCON_USE_MSI = 0x2, +}; + +/** + * struct efx_dl_falcon_resources - Falcon resource information. + * + * This structure describes Falcon hardware resources available for + * use by a driverlink driver. + * + * @hdr: Resource linked list header + * @biu_lock: Register access lock. + * Some Falcon revisions require register access for configuration + * registers to be serialised between ports and PCI functions. + * The sfc driver will provide the appropriate lock semantics for + * the underlying hardware. + * @buffer_table_min: First available buffer table entry + * @buffer_table_lim: Last available buffer table entry + 1 + * @evq_timer_min: First available event queue with timer + * @evq_timer_lim: Last available event queue with timer + 1 + * @evq_int_min: First available event queue with interrupt + * @evq_int_lim: Last available event queue with interrupt + 1 + * @rxq_min: First available RX queue + * @rxq_lim: Last available RX queue + 1 + * @txq_min: First available TX queue + * @txq_lim: Last available TX queue + 1 + * @flags: Hardware variation flags + */ +struct efx_dl_falcon_resources { + struct efx_dl_device_info hdr; + spinlock_t *biu_lock; + unsigned buffer_table_min; + unsigned buffer_table_lim; + unsigned evq_timer_min; + unsigned evq_timer_lim; + unsigned evq_int_min; + unsigned evq_int_lim; + unsigned rxq_min; + unsigned rxq_lim; + unsigned txq_min; + unsigned txq_lim; + enum efx_dl_falcon_resource_flags flags; +}; + +/** + * struct efx_dl_device - An Efx driverlink device. + * + * @pci_dev: PCI device used by the sfc driver. + * @priv: Driver private data + * Driverlink clients can use this to store a pointer to their + * internal per-device data structure. Each (driver, device) + * tuple has a separate &struct efx_dl_device, so clients can use + * this @priv field independently. + * @driver: Efx driverlink driver for this device + */ +struct efx_dl_device { + struct pci_dev *pci_dev; + void *priv; + struct efx_dl_driver *driver; +}; + +/** + * enum efx_veto - Packet veto request flag. + * + * This is the return type for the rx_packet() and tx_packet() methods + * in &struct efx_dl_callbacks. + * + * @EFX_ALLOW_PACKET: Packet may be transmitted/received + * @EFX_VETO_PACKET: Packet must not be transmitted/received + */ +enum efx_veto { + EFX_ALLOW_PACKET = 0, + EFX_VETO_PACKET = 1, +}; + +/** + * struct efx_dl_callbacks - Efx callbacks + * + * This is a tighly controlled set of simple callbacks, that are attached + * to the sfc driver via efx_dl_register_callbacks(). They export just enough + * state to allow clients to make use of the available hardware resources. + * + * For efficiency, only one client can hook each callback. Since these + * callbacks are called on packet transmit and reception paths, and the + * sfc driver may have multiple tx and rx queues per port, clients should + * avoid acquiring locks or allocating memory. + * + * @tx_packet: Called when packet is about to be transmitted + * Called for every packet about to be transmitted, providing means + * for the client to snoop traffic, and veto transmission by returning + * %EFX_VETO_PACKET (the sfc driver will subsequently free the skb). + * Context: tasklet, netif_tx_lock held + * @rx_packet: Called when packet is received + * Called for every received packet (after LRO), allowing the client + * to snoop every received packet (on every rx queue), and veto + * reception by returning %EFX_VETO_PACKET. + * Context: tasklet + * @request_mtu: Called to request MTU change. + * Called whenever the user requests the net_dev mtu to be changed. + * If the client returns an error, the mtu change is aborted. The sfc + * driver guarantees that no other callbacks are running. + * Context: process, rtnl_lock held. + * @mtu_changed: Called when MTU has been changed. + * Called after the mtu has been successfully changed, always after + * a previous call to request_mtu(). The sfc driver guarantees that no + * other callbacks are running. + * Context: process, rtnl_lock held. + * @event: Called when a hardware NIC event is not understood by the sfc driver. + * Context: tasklet. + */ +struct efx_dl_callbacks { + enum efx_veto (*tx_packet) (struct efx_dl_device *efx_dev, + struct sk_buff *skb); + enum efx_veto (*rx_packet) (struct efx_dl_device *efx_dev, + const char *pkt_hdr, int pkt_len); + int (*request_mtu) (struct efx_dl_device *efx_dev, int new_mtu); + void (*mtu_changed) (struct efx_dl_device *efx_dev, int mtu); + void (*event) (struct efx_dl_device *efx_dev, void *p_event); +}; + +/* Include API version number in symbol used for efx_dl_register_driver */ +#define efx_dl_stringify_1(x, y) x ## y +#define efx_dl_stringify_2(x, y) efx_dl_stringify_1(x, y) +#define efx_dl_register_driver \ + efx_dl_stringify_2(efx_dl_register_driver_api_ver_, \ + EFX_DRIVERLINK_API_VERSION) + +/* Exported driverlink api used to register and unregister the client driver + * and any callbacks [only one per port allowed], and to allow a client driver + * to request reset to recover from an error condition. + * + * All of these functions acquire the driverlink semaphore, so must not be + * called from an efx_dl_driver or efx_dl_callbacks member, and must be called + * from process context. + */ +extern int efx_dl_register_driver(struct efx_dl_driver *driver); + +extern void efx_dl_unregister_driver(struct efx_dl_driver *driver); + +extern int efx_dl_register_callbacks(struct efx_dl_device *efx_dev, + struct efx_dl_callbacks *callbacks); + +extern void efx_dl_unregister_callbacks(struct efx_dl_device *efx_dev, + struct efx_dl_callbacks *callbacks); + +/* Schedule a reset without grabbing any locks */ +extern void efx_dl_schedule_reset(struct efx_dl_device *efx_dev); + +/** + * efx_dl_for_each_device_info_matching - iterate an efx_dl_device_info list + * @_dev_info: Pointer to first &struct efx_dl_device_info + * @_type: Type code to look for + * @_info_type: Structure type corresponding to type code + * @_field: Name of &struct efx_dl_device_info field in the type + * @_p: Iterator variable + * + * Example: + * struct efx_dl_falcon_resources *res; + * efx_dl_for_each_device_info_matching(dev_info, EFX_DL_FALCON_RESOURCES, + * struct efx_dl_falcon_resources, + * hdr, res) { + * if (res->flags & EFX_DL_FALCON_DUAL_FUNC) + * .... + * } + */ +#define efx_dl_for_each_device_info_matching(_dev_info, _type, \ + _info_type, _field, _p) \ + for ((_p) = container_of((_dev_info), _info_type, _field); \ + (_p) != NULL; \ + (_p) = container_of((_p)->_field.next, _info_type, _field))\ + if ((_p)->_field.type != _type) \ + continue; \ + else + +/** + * efx_dl_search_device_info - search an efx_dl_device_info list + * @_dev_info: Pointer to first &struct efx_dl_device_info + * @_type: Type code to look for + * @_info_type: Structure type corresponding to type code + * @_field: Name of &struct efx_dl_device_info member in this type + * @_p: Result variable + * + * Example: + * struct efx_dl_falcon_resources *res; + * efx_dl_search_device_info(dev_info, EFX_DL_FALCON_RESOURCES, + * struct efx_dl_falcon_resources, hdr, res); + * if (res) + * .... + */ +#define efx_dl_search_device_info(_dev_info, _type, _info_type, \ + _field, _p) \ + efx_dl_for_each_device_info_matching((_dev_info), (_type), \ + _info_type, _field, (_p)) \ + break; + +#endif /* EFX_DRIVERLINK_API_H */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/driverlink.h +++ linux-ec2-2.6.31/drivers/net/sfc/driverlink.h @@ -0,0 +1,66 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_DRIVERLINK_H +#define EFX_DRIVERLINK_H + +/* Forward declarations */ +struct efx_dl_device; +struct efx_nic; + +#ifdef CONFIG_SFC_DRIVERLINK + +#include "driverlink_api.h" + +/* Efx callback devices + * + * A list of the devices that own each callback. The partner to + * struct efx_dl_callbacks. + */ +struct efx_dl_cb_devices { + struct efx_dl_device *tx_packet; + struct efx_dl_device *rx_packet; + struct efx_dl_device *request_mtu; + struct efx_dl_device *mtu_changed; + struct efx_dl_device *event; +}; + +extern struct efx_dl_callbacks efx_default_callbacks; + +#define EFX_DL_CALLBACK(_port, _name, ...) \ + (_port)->dl_cb._name((_port)->dl_cb_dev._name, __VA_ARGS__) + +extern int efx_dl_register_nic(struct efx_nic *efx); +extern void efx_dl_unregister_nic(struct efx_nic *efx); + +/* Suspend and resume client drivers over a hardware reset */ +extern void efx_dl_reset_suspend(struct efx_nic *efx); +extern void efx_dl_reset_resume(struct efx_nic *efx, int ok); + +#define EFX_DL_LOG EFX_LOG + +#else /* CONFIG_SFC_DRIVERLINK */ + +enum efx_veto { EFX_ALLOW_PACKET = 0 }; + +static inline int efx_nop_callback(struct efx_nic *efx) { return 0; } +#define EFX_DL_CALLBACK(port, name, ...) efx_nop_callback(port) + +static inline int efx_dl_register_nic(struct efx_nic *efx) { return 0; } +static inline void efx_dl_unregister_nic(struct efx_nic *efx) {} + +static inline void efx_dl_reset_suspend(struct efx_nic *efx) {} +static inline void efx_dl_reset_resume(struct efx_nic *efx, int ok) {} + +#define EFX_DL_LOG(efx, fmt, args...) ((void)(efx)) + +#endif /* CONFIG_SFC_DRIVERLINK */ + +#endif /* EFX_DRIVERLINK_H */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/Kconfig +++ linux-ec2-2.6.31/drivers/net/sfc/Kconfig @@ -11,6 +11,17 @@ To compile this driver as a module, choose M here. The module will be called sfc. + +config SFC_DRIVERLINK + bool + +config SFC_RESOURCE + depends on SFC && X86 + select SFC_DRIVERLINK + tristate "Solarflare Solarstorm SFC4000 resource driver" + help + This module provides the SFC resource manager driver. + config SFC_MTD bool "Solarflare Solarstorm SFC4000 flash MTD support" depends on SFC && MTD && !(SFC=y && MTD=m) --- linux-ec2-2.6.31.orig/drivers/net/sfc/net_driver.h +++ linux-ec2-2.6.31/drivers/net/sfc/net_driver.h @@ -29,6 +29,7 @@ #include "enum.h" #include "bitfield.h" +#include "driverlink.h" /************************************************************************** * @@ -754,6 +755,12 @@ * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state + * @silicon_rev: Silicon revision description for driverlink + * @dl_info: Linked list of hardware parameters exposed through driverlink + * @dl_node: Driverlink port list + * @dl_device_list: Driverlink device list + * @dl_cb: Driverlink callbacks table + * @dl_cb_dev: Driverlink callback owner devices * * The @priv field of the corresponding &struct net_device points to * this. @@ -844,6 +851,15 @@ unsigned int loopback_modes; void *loopback_selftest; + + const char *silicon_rev; +#ifdef CONFIG_SFC_DRIVERLINK + struct efx_dl_device_info *dl_info; + struct list_head dl_node; + struct list_head dl_device_list; + struct efx_dl_callbacks dl_cb; + struct efx_dl_cb_devices dl_cb_dev; +#endif }; static inline int efx_dev_registered(struct efx_nic *efx) --- linux-ec2-2.6.31.orig/drivers/net/sfc/Makefile +++ linux-ec2-2.6.31/drivers/net/sfc/Makefile @@ -1,6 +1,9 @@ sfc-y += efx.o falcon.o tx.o rx.o falcon_gmac.o \ falcon_xmac.o selftest.o ethtool.o xfp_phy.o \ mdio_10g.o tenxpress.o boards.o sfe4001.o +sfc-$(CONFIG_SFC_DRIVERLINK) += driverlink.o sfc-$(CONFIG_SFC_MTD) += mtd.o obj-$(CONFIG_SFC) += sfc.o + +obj-$(CONFIG_SFC_RESOURCE) += sfc_resource/ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/efrm_internal.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/efrm_internal.h @@ -0,0 +1,41 @@ +#ifndef __EFRM_INTERNAL_H__ +#define __EFRM_INTERNAL_H__ + + +struct filter_resource { + struct efrm_resource rs; + struct vi_resource *pt; + int filter_idx; +}; + +#define filter_resource(rs1) container_of((rs1), struct filter_resource, rs) + + +struct efrm_client { + void *user_data; + struct list_head link; + struct efrm_client_callbacks *callbacks; + struct efhw_nic *nic; + int ref_count; + struct list_head resources; +}; + + +extern void efrm_client_add_resource(struct efrm_client *, + struct efrm_resource *); + +extern int efrm_buffer_table_size(void); + + +static inline void efrm_resource_init(struct efrm_resource *rs, + int type, int instance) +{ + EFRM_ASSERT(instance >= 0); + EFRM_ASSERT(type >= 0 && type < EFRM_RESOURCE_NUM); + rs->rs_ref_count = 1; + rs->rs_handle.handle = (type << 28u) | + (((unsigned)jiffies & 0xfff) << 16) | instance; +} + + +#endif /* __EFRM_INTERNAL_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/falcon.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/falcon.c @@ -0,0 +1,2525 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains Falcon hardware support. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +/*---------------------------------------------------------------------------- + * + * Workarounds and options + * + *---------------------------------------------------------------------------*/ + +/* Keep a software copy of the filter table and check for duplicates. */ +#define FALCON_FULL_FILTER_CACHE 1 + +/* Read filters back from the hardware to detect corruption. */ +#define FALCON_VERIFY_FILTERS 0 + +/* Options */ +#define RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL 8 /* default search limit */ +#define RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD 8 /* default search limit */ +#define RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL 8 /* default search limit */ +#define RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD 8 /* default search limit */ + +#define FALCON_MAC_SET_TYPE_BY_SPEED 0 + +/* FIXME: We should detect mode at runtime. */ +#define FALCON_BUFFER_TABLE_FULL_MODE 1 + +/* "Fudge factors" - difference between programmed value and actual depth */ +#define RX_FILTER_CTL_SRCH_FUDGE_WILD 3 /* increase the search limit */ +#define RX_FILTER_CTL_SRCH_FUDGE_FULL 1 /* increase the search limit */ +#define TX_FILTER_CTL_SRCH_FUDGE_WILD 3 /* increase the search limit */ +#define TX_FILTER_CTL_SRCH_FUDGE_FULL 1 /* increase the search limit */ + +/*---------------------------------------------------------------------------- + * + * Debug Macros + * + *---------------------------------------------------------------------------*/ + +#define _DEBUG_SYM_ static + + /*---------------------------------------------------------------------------- + * + * Macros and forward declarations + * + *--------------------------------------------------------------------------*/ + +#define FALCON_REGION_NUM 4 /* number of supported memory regions */ + +#define FALCON_BUFFER_TBL_HALF_BYTES 4 +#define FALCON_BUFFER_TBL_FULL_BYTES 8 + +/* Shadow buffer table - hack for testing only */ +#if FALCON_BUFFER_TABLE_FULL_MODE == 0 +# define FALCON_USE_SHADOW_BUFFER_TABLE 1 +#else +# define FALCON_USE_SHADOW_BUFFER_TABLE 0 +#endif + + +/*---------------------------------------------------------------------------- + * + * Header assertion checks + * + *---------------------------------------------------------------------------*/ + +#define FALCON_ASSERT_VALID() /* nothing yet */ + +/* Falcon has a 128bit register model but most registers have useful + defaults or only implement a small number of bits. Some registers + can be programmed 32bits UNLOCKED all others should be interlocked + against other threads within the same protection domain. + + Aim is for software to perform the minimum number of writes and + also to minimise the read-modify-write activity (which generally + indicates a lack of clarity in the use model). + + Registers which are programmed in this module are listed below + together with the method of access. Care must be taken to ensure + remain adequate if the register spec changes. + + All 128bits programmed + FALCON_BUFFER_TBL_HALF + RX_FILTER_TBL + TX_DESC_PTR_TBL + RX_DESC_PTR_TBL + DRV_EV_REG + + All 64bits programmed + FALCON_BUFFER_TBL_FULL + + 32 bits are programmed (UNLOCKED) + EVQ_RPTR_REG + + Low 64bits programmed remainder are written with a random number + RX_DC_CFG_REG + TX_DC_CFG_REG + SRM_RX_DC_CFG_REG + SRM_TX_DC_CFG_REG + BUF_TBL_CFG_REG + BUF_TBL_UPD_REG + SRM_UPD_EVQ_REG + EVQ_PTR_TBL + TIMER_CMD_REG + TX_PACE_TBL + FATAL_INTR_REG + INT_EN_REG (When enabling interrupts) + TX_FLUSH_DESCQ_REG + RX_FLUSH_DESCQ + + Read Modify Write on low 32bits remainder are written with a random number + INT_EN_REG (When sending a driver interrupt) + DRIVER_REGX + + Read Modify Write on low 64bits remainder are written with a random number + SRM_CFG_REG_OFST + RX_CFG_REG_OFST + RX_FILTER_CTL_REG + + Read Modify Write on full 128bits + TXDP_RESERVED_REG (aka TXDP_UNDOCUMENTED) + TX_CFG_REG + +*/ + + +/*---------------------------------------------------------------------------- + * + * DMAQ low-level register interface + * + *---------------------------------------------------------------------------*/ + +static unsigned dmaq_sizes[] = { + 512, + EFHW_1K, + EFHW_2K, + EFHW_4K, +}; + +#define N_DMAQ_SIZES (sizeof(dmaq_sizes) / sizeof(dmaq_sizes[0])) + +static inline ulong falcon_dma_tx_q_offset(struct efhw_nic *nic, unsigned dmaq) +{ + EFHW_ASSERT(dmaq < nic->num_dmaqs); + return TX_DESC_PTR_TBL_OFST + dmaq * FALCON_REGISTER128; +} + +static inline uint falcon_dma_tx_q_size_index(uint dmaq_size) +{ + uint i; + + /* size must be one of the various options, otherwise we assert */ + for (i = 0; i < N_DMAQ_SIZES; i++) { + if (dmaq_size == dmaq_sizes[i]) + break; + } + EFHW_ASSERT(i < N_DMAQ_SIZES); + return i; +} + +static void +falcon_dmaq_tx_q_init(struct efhw_nic *nic, + uint dmaq, uint evq_id, uint own_id, + uint tag, uint dmaq_size, uint buf_idx, uint flags) +{ + FALCON_LOCK_DECL; + uint index, desc_type; + uint64_t val1, val2, val3; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* Q attributes */ + int iscsi_hdig_en = ((flags & EFHW_VI_ISCSI_TX_HDIG_EN) != 0); + int iscsi_ddig_en = ((flags & EFHW_VI_ISCSI_TX_DDIG_EN) != 0); + int csum_ip_dis = ((flags & EFHW_VI_TX_IP_CSUM_DIS) != 0); + int csum_tcp_dis = ((flags & EFHW_VI_TX_TCPUDP_CSUM_DIS) != 0); + int non_ip_drop_dis = ((flags & EFHW_VI_TX_TCPUDP_ONLY) == 0); + + /* initialise the TX descriptor queue pointer table */ + + /* NB physical vs buffer addressing is determined by the Queue ID. */ + + offset = falcon_dma_tx_q_offset(nic, dmaq); + index = falcon_dma_tx_q_size_index(dmaq_size); + + /* allow VI flag to override this queue's descriptor type */ + desc_type = (flags & EFHW_VI_TX_PHYS_ADDR_EN) ? 0 : 1; + + /* bug9403: It is dangerous to allow buffer-addressed queues to + * have owner_id=0. */ + EFHW_ASSERT((own_id > 0) || desc_type == 0); + + /* dword 1 */ + __DWCHCK(TX_DESCQ_FLUSH_LBN, TX_DESCQ_FLUSH_WIDTH); + __DWCHCK(TX_DESCQ_TYPE_LBN, TX_DESCQ_TYPE_WIDTH); + __DWCHCK(TX_DESCQ_SIZE_LBN, TX_DESCQ_SIZE_WIDTH); + __DWCHCK(TX_DESCQ_LABEL_LBN, TX_DESCQ_LABEL_WIDTH); + __DWCHCK(TX_DESCQ_OWNER_ID_LBN, TX_DESCQ_OWNER_ID_WIDTH); + + __LWCHK(TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH); + + __RANGECHCK(1, TX_DESCQ_FLUSH_WIDTH); + __RANGECHCK(desc_type, TX_DESCQ_TYPE_WIDTH); + __RANGECHCK(index, TX_DESCQ_SIZE_WIDTH); + __RANGECHCK(tag, TX_DESCQ_LABEL_WIDTH); + __RANGECHCK(own_id, TX_DESCQ_OWNER_ID_WIDTH); + __RANGECHCK(evq_id, TX_DESCQ_EVQ_ID_WIDTH); + + val1 = ((desc_type << TX_DESCQ_TYPE_LBN) | + (index << TX_DESCQ_SIZE_LBN) | + (tag << TX_DESCQ_LABEL_LBN) | + (own_id << TX_DESCQ_OWNER_ID_LBN) | + (__LOW(evq_id, TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH))); + + /* dword 2 */ + __DW2CHCK(TX_DESCQ_BUF_BASE_ID_LBN, TX_DESCQ_BUF_BASE_ID_WIDTH); + __RANGECHCK(buf_idx, TX_DESCQ_BUF_BASE_ID_WIDTH); + + val2 = ((__HIGH(evq_id, TX_DESCQ_EVQ_ID_LBN, TX_DESCQ_EVQ_ID_WIDTH)) | + (buf_idx << __DW2(TX_DESCQ_BUF_BASE_ID_LBN))); + + /* dword 3 */ + __DW3CHCK(TX_ISCSI_HDIG_EN_LBN, TX_ISCSI_HDIG_EN_WIDTH); + __DW3CHCK(TX_ISCSI_DDIG_EN_LBN, TX_ISCSI_DDIG_EN_WIDTH); + __RANGECHCK(iscsi_hdig_en, TX_ISCSI_HDIG_EN_WIDTH); + __RANGECHCK(iscsi_ddig_en, TX_ISCSI_DDIG_EN_WIDTH); + + val3 = ((iscsi_hdig_en << __DW3(TX_ISCSI_HDIG_EN_LBN)) | + (iscsi_ddig_en << __DW3(TX_ISCSI_DDIG_EN_LBN)) | + (1 << __DW3(TX_DESCQ_EN_LBN))); /* queue enable bit */ + + switch (nic->devtype.variant) { + case 'B': + __DW3CHCK(TX_NON_IP_DROP_DIS_B0_LBN, + TX_NON_IP_DROP_DIS_B0_WIDTH); + __DW3CHCK(TX_IP_CHKSM_DIS_B0_LBN, TX_IP_CHKSM_DIS_B0_WIDTH); + __DW3CHCK(TX_TCP_CHKSM_DIS_B0_LBN, TX_TCP_CHKSM_DIS_B0_WIDTH); + + val3 |= ((non_ip_drop_dis << __DW3(TX_NON_IP_DROP_DIS_B0_LBN))| + (csum_ip_dis << __DW3(TX_IP_CHKSM_DIS_B0_LBN)) | + (csum_tcp_dis << __DW3(TX_TCP_CHKSM_DIS_B0_LBN))); + break; + case 'A': + if (csum_ip_dis || csum_tcp_dis || !non_ip_drop_dis) + EFHW_WARN + ("%s: bad settings for A1 csum_ip_dis=%d " + "csum_tcp_dis=%d non_ip_drop_dis=%d", + __func__, csum_ip_dis, + csum_tcp_dis, non_ip_drop_dis); + break; + default: + EFHW_ASSERT(0); + break; + } + + EFHW_TRACE("%s: txq %x evq %u tag %x id %x buf %x " + "%x:%x:%x->%" PRIx64 ":%" PRIx64 ":%" PRIx64, + __func__, + dmaq, evq_id, tag, own_id, buf_idx, dmaq_size, + iscsi_hdig_en, iscsi_ddig_en, val1, val2, val3); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return; +} + +static inline ulong +falcon_dma_rx_q_offset(struct efhw_nic *nic, unsigned dmaq) +{ + EFHW_ASSERT(dmaq < nic->num_dmaqs); + return RX_DESC_PTR_TBL_OFST + dmaq * FALCON_REGISTER128; +} + +static void +falcon_dmaq_rx_q_init(struct efhw_nic *nic, + uint dmaq, uint evq_id, uint own_id, + uint tag, uint dmaq_size, uint buf_idx, uint flags) +{ + FALCON_LOCK_DECL; + uint i, desc_type = 1; + uint64_t val1, val2, val3; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* Q attributes */ +#if BUG5762_WORKAROUND + int jumbo = 1; /* Queues must not have mixed types */ +#else + int jumbo = ((flags & EFHW_VI_JUMBO_EN) != 0); +#endif + int iscsi_hdig_en = ((flags & EFHW_VI_ISCSI_RX_HDIG_EN) != 0); + int iscsi_ddig_en = ((flags & EFHW_VI_ISCSI_RX_DDIG_EN) != 0); + + /* initialise the TX descriptor queue pointer table */ + offset = falcon_dma_rx_q_offset(nic, dmaq); + + /* size must be one of the various options, otherwise we assert */ + for (i = 0; i < N_DMAQ_SIZES; i++) { + if (dmaq_size == dmaq_sizes[i]) + break; + } + EFHW_ASSERT(i < N_DMAQ_SIZES); + + /* allow VI flag to override this queue's descriptor type */ + desc_type = (flags & EFHW_VI_RX_PHYS_ADDR_EN) ? 0 : 1; + + /* bug9403: It is dangerous to allow buffer-addressed queues to have + * owner_id=0 */ + EFHW_ASSERT((own_id > 0) || desc_type == 0); + + /* dword 1 */ + __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); + __DWCHCK(RX_DESCQ_JUMBO_LBN, RX_DESCQ_JUMBO_WIDTH); + __DWCHCK(RX_DESCQ_TYPE_LBN, RX_DESCQ_TYPE_WIDTH); + __DWCHCK(RX_DESCQ_SIZE_LBN, RX_DESCQ_SIZE_WIDTH); + __DWCHCK(RX_DESCQ_LABEL_LBN, RX_DESCQ_LABEL_WIDTH); + __DWCHCK(RX_DESCQ_OWNER_ID_LBN, RX_DESCQ_OWNER_ID_WIDTH); + + __LWCHK(RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH); + + __RANGECHCK(1, RX_DESCQ_EN_WIDTH); + __RANGECHCK(jumbo, RX_DESCQ_JUMBO_WIDTH); + __RANGECHCK(desc_type, RX_DESCQ_TYPE_WIDTH); + __RANGECHCK(i, RX_DESCQ_SIZE_WIDTH); + __RANGECHCK(tag, RX_DESCQ_LABEL_WIDTH); + __RANGECHCK(own_id, RX_DESCQ_OWNER_ID_WIDTH); + __RANGECHCK(evq_id, RX_DESCQ_EVQ_ID_WIDTH); + + val1 = ((1 << RX_DESCQ_EN_LBN) | + (jumbo << RX_DESCQ_JUMBO_LBN) | + (desc_type << RX_DESCQ_TYPE_LBN) | + (i << RX_DESCQ_SIZE_LBN) | + (tag << RX_DESCQ_LABEL_LBN) | + (own_id << RX_DESCQ_OWNER_ID_LBN) | + (__LOW(evq_id, RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH))); + + /* dword 2 */ + __DW2CHCK(RX_DESCQ_BUF_BASE_ID_LBN, RX_DESCQ_BUF_BASE_ID_WIDTH); + __RANGECHCK(buf_idx, RX_DESCQ_BUF_BASE_ID_WIDTH); + + val2 = ((__HIGH(evq_id, RX_DESCQ_EVQ_ID_LBN, RX_DESCQ_EVQ_ID_WIDTH)) | + (buf_idx << __DW2(RX_DESCQ_BUF_BASE_ID_LBN))); + + /* dword 3 */ + __DW3CHCK(RX_ISCSI_HDIG_EN_LBN, RX_ISCSI_HDIG_EN_WIDTH); + __DW3CHCK(RX_ISCSI_DDIG_EN_LBN, RX_ISCSI_DDIG_EN_WIDTH); + __RANGECHCK(iscsi_hdig_en, RX_ISCSI_HDIG_EN_WIDTH); + __RANGECHCK(iscsi_ddig_en, RX_ISCSI_DDIG_EN_WIDTH); + + val3 = (iscsi_hdig_en << __DW3(RX_ISCSI_HDIG_EN_LBN)) | + (iscsi_ddig_en << __DW3(RX_ISCSI_DDIG_EN_LBN)); + + EFHW_TRACE("%s: rxq %x evq %u tag %x id %x buf %x %s " + "%x:%x:%x -> %" PRIx64 ":%" PRIx64 ":%" PRIx64, + __func__, + dmaq, evq_id, tag, own_id, buf_idx, + jumbo ? "jumbo" : "normal", dmaq_size, + iscsi_hdig_en, iscsi_ddig_en, val1, val2, val3); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return; +} + +static void falcon_dmaq_tx_q_disable(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val1, val2, val3; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* initialise the TX descriptor queue pointer table */ + + offset = falcon_dma_tx_q_offset(nic, dmaq); + + /* dword 1 */ + __DWCHCK(TX_DESCQ_TYPE_LBN, TX_DESCQ_TYPE_WIDTH); + + val1 = ((uint64_t) 1 << TX_DESCQ_TYPE_LBN); + + /* dword 2 */ + val2 = 0; + + /* dword 3 */ + val3 = (0 << __DW3(TX_DESCQ_EN_LBN)); /* queue enable bit */ + + EFHW_TRACE("%s: %x->%" PRIx64 ":%" PRIx64 ":%" PRIx64, + __func__, dmaq, val1, val2, val3); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return; +} + +static void falcon_dmaq_rx_q_disable(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val1, val2, val3; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* initialise the TX descriptor queue pointer table */ + offset = falcon_dma_rx_q_offset(nic, dmaq); + + /* dword 1 */ + __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); + __DWCHCK(RX_DESCQ_TYPE_LBN, RX_DESCQ_TYPE_WIDTH); + + val1 = ((0 << RX_DESCQ_EN_LBN) | (1 << RX_DESCQ_TYPE_LBN)); + + /* dword 2 */ + val2 = 0; + + /* dword 3 */ + val3 = 0; + + EFHW_TRACE("falcon_dmaq_rx_q_disable: %x->%" + PRIx64 ":%" PRIx64 ":%" PRIx64, + dmaq, val1, val2, val3); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, ((val2 << 32) | val1), val3); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return; +} + + +/*---------------------------------------------------------------------------- + * + * Buffer Table low-level register interface + * + *---------------------------------------------------------------------------*/ + +/*! Convert a (potentially) 64-bit physical address to 32-bits. Every use +** of this function is a place where we're not 64-bit clean. +*/ +static inline uint32_t dma_addr_to_u32(dma_addr_t addr) +{ + /* Top bits had better be zero! */ + EFHW_ASSERT(addr == (addr & 0xffffffff)); + return (uint32_t) addr; +} + +static inline uint32_t +falcon_nic_buffer_table_entry32_mk(dma_addr_t dma_addr, int own_id) +{ + uint32_t dma_addr32 = FALCON_BUFFER_4K_PAGE(dma_addr_to_u32(dma_addr)); + + /* don't do this to me */ + EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_LBN == BUF_ADR_HBUF_EVEN_LBN + 32); + EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_LBN == + BUF_OWNER_ID_HBUF_EVEN_LBN + 32); + + EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_WIDTH == + BUF_OWNER_ID_HBUF_EVEN_WIDTH); + EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_WIDTH == BUF_ADR_HBUF_EVEN_WIDTH); + + __DWCHCK(BUF_ADR_HBUF_EVEN_LBN, BUF_ADR_HBUF_EVEN_WIDTH); + __DWCHCK(BUF_OWNER_ID_HBUF_EVEN_LBN, BUF_OWNER_ID_HBUF_EVEN_WIDTH); + + __RANGECHCK(dma_addr32, BUF_ADR_HBUF_EVEN_WIDTH); + __RANGECHCK(own_id, BUF_OWNER_ID_HBUF_EVEN_WIDTH); + + return (dma_addr32 << BUF_ADR_HBUF_EVEN_LBN) | + (own_id << BUF_OWNER_ID_HBUF_EVEN_LBN); +} + +static inline uint64_t +falcon_nic_buffer_table_entry64_mk(dma_addr_t dma_addr, + int bufsz, /* bytes */ + int region, int own_id) +{ + __DW2CHCK(IP_DAT_BUF_SIZE_LBN, IP_DAT_BUF_SIZE_WIDTH); + __DW2CHCK(BUF_ADR_REGION_LBN, BUF_ADR_REGION_WIDTH); + __LWCHK(BUF_ADR_FBUF_LBN, BUF_ADR_FBUF_WIDTH); + __DWCHCK(BUF_OWNER_ID_FBUF_LBN, BUF_OWNER_ID_FBUF_WIDTH); + + EFHW_ASSERT((bufsz == EFHW_4K) || (bufsz == EFHW_8K)); + + dma_addr = (dma_addr >> 12) & __FALCON_MASK64(BUF_ADR_FBUF_WIDTH); + + __RANGECHCK(dma_addr, BUF_ADR_FBUF_WIDTH); + __RANGECHCK(1, IP_DAT_BUF_SIZE_WIDTH); + __RANGECHCK(region, BUF_ADR_REGION_WIDTH); + __RANGECHCK(own_id, BUF_OWNER_ID_FBUF_WIDTH); + + return ((uint64_t) (bufsz == EFHW_8K) << IP_DAT_BUF_SIZE_LBN) | + ((uint64_t) region << BUF_ADR_REGION_LBN) | + ((uint64_t) dma_addr << BUF_ADR_FBUF_LBN) | + ((uint64_t) own_id << BUF_OWNER_ID_FBUF_LBN); +} + +static inline void +_falcon_nic_buffer_table_set32(struct efhw_nic *nic, + dma_addr_t dma_addr, uint bufsz, + uint region, /* not used */ + int own_id, int buffer_id) +{ + /* programming the half table needs to be done in pairs. */ + uint64_t entry, val, shift; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + volatile char __iomem *offset; + + EFHW_BUILD_ASSERT(BUF_ADR_HBUF_ODD_LBN == BUF_ADR_HBUF_EVEN_LBN + 32); + EFHW_BUILD_ASSERT(BUF_OWNER_ID_HBUF_ODD_LBN == + BUF_OWNER_ID_HBUF_EVEN_LBN + 32); + + shift = (buffer_id & 1) ? 32 : 0; + + offset = (efhw_kva + BUF_HALF_TBL_OFST + + ((buffer_id & ~1) * FALCON_BUFFER_TBL_HALF_BYTES)); + + entry = falcon_nic_buffer_table_entry32_mk(dma_addr_to_u32(dma_addr), + own_id); + +#if FALCON_USE_SHADOW_BUFFER_TABLE + val = _falcon_buffer_table[buffer_id & ~1]; +#else + /* This will not work unless we've completed + * the buffer table updates */ + falcon_read_q(offset, &val); +#endif + val &= ~(((uint64_t) 0xffffffff) << shift); + val |= (entry << shift); + + EFHW_TRACE("%s[%x]: %lx:%x:%" PRIx64 "->%x = %" + PRIx64, __func__, buffer_id, (unsigned long) dma_addr, + own_id, entry, (unsigned)(offset - efhw_kva), val); + + /* Falcon requires that access to this register is serialised */ + falcon_write_q(offset, val); + + /* NB. No mmiowb(). Caller should do that e.g by calling commit */ + +#if FALCON_USE_SHADOW_BUFFER_TABLE + _falcon_buffer_table[buffer_id & ~1] = val; +#endif + + /* Confirm the entry if the event queues haven't been set up. */ + if (!nic->irq_handler) { + uint64_t new_val; + int count = 0; + while (1) { + mmiowb(); + falcon_read_q(offset, &new_val); + if (new_val == val) + break; + count++; + if (count > 1000) { + EFHW_WARN("%s: poll Timeout", __func__); + break; + } + udelay(1); + } + } +} + +static inline void +_falcon_nic_buffer_table_set64(struct efhw_nic *nic, + dma_addr_t dma_addr, uint bufsz, + uint region, int own_id, int buffer_id) +{ + volatile char __iomem *offset; + uint64_t entry; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_ASSERT(region < FALCON_REGION_NUM); + + EFHW_ASSERT((bufsz == EFHW_4K) || + (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); + + offset = (efhw_kva + BUF_FULL_TBL_OFST + + (buffer_id * FALCON_BUFFER_TBL_FULL_BYTES)); + + entry = falcon_nic_buffer_table_entry64_mk(dma_addr, bufsz, region, + own_id); + + EFHW_TRACE("%s[%x]: %lx:bufsz=%x:region=%x:ownid=%x", + __func__, buffer_id, (unsigned long) dma_addr, bufsz, + region, own_id); + + EFHW_TRACE("%s: BUF[%x]:NIC[%x]->%" PRIx64, + __func__, buffer_id, + (unsigned int)(offset - efhw_kva), entry); + + /* Falcon requires that access to this register is serialised */ + falcon_write_q(offset, entry); + + /* NB. No mmiowb(). Caller should do that e.g by calling commit */ + + /* Confirm the entry if the event queues haven't been set up. */ + if (!nic->irq_handler) { + uint64_t new_entry; + int count = 0; + while (1) { + mmiowb(); + falcon_read_q(offset, &new_entry); + if (new_entry == entry) + return; + count++; + if (count > 1000) { + EFHW_WARN("%s: poll Timeout waiting for " + "value %"PRIx64 + " (last was %"PRIx64")", + __func__, entry, new_entry); + break; + } + udelay(1); + } + } +} + +#if FALCON_BUFFER_TABLE_FULL_MODE +#define _falcon_nic_buffer_table_set _falcon_nic_buffer_table_set64 +#else +#define _falcon_nic_buffer_table_set _falcon_nic_buffer_table_set32 +#endif + +static inline void _falcon_nic_buffer_table_commit(struct efhw_nic *nic) +{ + /* MUST be called holding the FALCON_LOCK */ + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint64_t cmd; + + EFHW_BUILD_ASSERT(BUF_TBL_UPD_REG_KER_OFST == BUF_TBL_UPD_REG_OFST); + + __DW2CHCK(BUF_UPD_CMD_LBN, BUF_UPD_CMD_WIDTH); + __RANGECHCK(1, BUF_UPD_CMD_WIDTH); + + cmd = ((uint64_t) 1 << BUF_UPD_CMD_LBN); + + /* Falcon requires 128 bit atomic access for this register */ + falcon_write_qq(efhw_kva + BUF_TBL_UPD_REG_OFST, + cmd, FALCON_ATOMIC_UPD_REG); + mmiowb(); + + nic->buf_commit_outstanding++; + EFHW_TRACE("COMMIT REQ out=%d", nic->buf_commit_outstanding); +} + +static void falcon_nic_buffer_table_commit(struct efhw_nic *nic) +{ + /* nothing to do */ +} + +static inline void +_falcon_nic_buffer_table_clear(struct efhw_nic *nic, int buffer_id, int num) +{ + uint64_t cmd; + uint64_t start_id = buffer_id; + uint64_t end_id = buffer_id + num - 1; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + volatile char __iomem *offset = (efhw_kva + BUF_TBL_UPD_REG_OFST); + + EFHW_BUILD_ASSERT(BUF_TBL_UPD_REG_KER_OFST == BUF_TBL_UPD_REG_OFST); + +#if !FALCON_BUFFER_TABLE_FULL_MODE + /* buffer_ids in half buffer mode reference pairs of buffers */ + EFHW_ASSERT(buffer_id % 1 == 0); + EFHW_ASSERT(num % 1 == 0); + start_id = start_id >> 1; + end_id = end_id >> 1; +#endif + + EFHW_ASSERT(num >= 1); + + __DWCHCK(BUF_CLR_START_ID_LBN, BUF_CLR_START_ID_WIDTH); + __DW2CHCK(BUF_CLR_END_ID_LBN, BUF_CLR_END_ID_WIDTH); + + __DW2CHCK(BUF_CLR_CMD_LBN, BUF_CLR_CMD_WIDTH); + __RANGECHCK(1, BUF_CLR_CMD_WIDTH); + + __RANGECHCK(start_id, BUF_CLR_START_ID_WIDTH); + __RANGECHCK(end_id, BUF_CLR_END_ID_WIDTH); + + cmd = (((uint64_t) 1 << BUF_CLR_CMD_LBN) | + (start_id << BUF_CLR_START_ID_LBN) | + (end_id << BUF_CLR_END_ID_LBN)); + + /* Falcon requires 128 bit atomic access for this register */ + falcon_write_qq(offset, cmd, FALCON_ATOMIC_UPD_REG); + mmiowb(); + + nic->buf_commit_outstanding++; + EFHW_TRACE("COMMIT CLEAR out=%d", nic->buf_commit_outstanding); +} + +/*---------------------------------------------------------------------------- + * + * Events low-level register interface + * + *---------------------------------------------------------------------------*/ + +static unsigned eventq_sizes[] = { + 512, + EFHW_1K, + EFHW_2K, + EFHW_4K, + EFHW_8K, + EFHW_16K, + EFHW_32K +}; + +#define N_EVENTQ_SIZES (sizeof(eventq_sizes) / sizeof(eventq_sizes[0])) + +static inline void falcon_nic_srm_upd_evq(struct efhw_nic *nic, int evq) +{ + /* set up the eventq which will receive events from the SRAM module. + * i.e buffer table updates and clears, TX and RX aperture table + * updates */ + + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_BUILD_ASSERT(SRM_UPD_EVQ_REG_OFST == SRM_UPD_EVQ_REG_KER_OFST); + + __DWCHCK(SRM_UPD_EVQ_ID_LBN, SRM_UPD_EVQ_ID_WIDTH); + __RANGECHCK(evq, SRM_UPD_EVQ_ID_WIDTH); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + SRM_UPD_EVQ_REG_OFST, + ((uint64_t) evq << SRM_UPD_EVQ_ID_LBN), + FALCON_ATOMIC_SRPM_UDP_EVQ_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +static void +falcon_nic_evq_ptr_tbl(struct efhw_nic *nic, + uint evq, /* evq id */ + uint enable, /* 1 to enable, 0 to disable */ + uint buf_base_id,/* Buffer table base for EVQ */ + uint evq_size /* Number of events */) +{ + FALCON_LOCK_DECL; + uint i, val; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* size must be one of the various options, otherwise we assert */ + for (i = 0; i < N_EVENTQ_SIZES; i++) { + if (evq_size <= eventq_sizes[i]) + break; + } + EFHW_ASSERT(i < N_EVENTQ_SIZES); + + __DWCHCK(EVQ_BUF_BASE_ID_LBN, EVQ_BUF_BASE_ID_WIDTH); + __DWCHCK(EVQ_SIZE_LBN, EVQ_SIZE_WIDTH); + __DWCHCK(EVQ_EN_LBN, EVQ_EN_WIDTH); + + __RANGECHCK(i, EVQ_SIZE_WIDTH); + __RANGECHCK(buf_base_id, EVQ_BUF_BASE_ID_WIDTH); + __RANGECHCK(1, EVQ_EN_WIDTH); + + /* if !enable then only evq needs to be correct, although valid + * values need to be passed in for other arguments to prevent + * assertions */ + + val = ((i << EVQ_SIZE_LBN) | (buf_base_id << EVQ_BUF_BASE_ID_LBN) | + (enable ? (1 << EVQ_EN_LBN) : 0)); + + EFHW_ASSERT(evq < nic->num_evqs); + + offset = EVQ_PTR_TBL_CHAR_OFST; + offset += evq * FALCON_REGISTER128; + + EFHW_TRACE("%s: evq %u en=%x:buf=%x:size=%x->%x at %lx", + __func__, evq, enable, buf_base_id, evq_size, val, + offset); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, val, FALCON_ATOMIC_PTR_TBL_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + + /* caller must wait for an update done event before writing any more + table entries */ + + return; +} + +void +falcon_nic_evq_ack(struct efhw_nic *nic, + uint evq, /* evq id */ + uint rptr, /* new read pointer update */ + bool wakeup /* request a wakeup event if ptr's != */ + ) +{ + uint val; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_BUILD_ASSERT(FALCON_EVQ_CHAR == 4); + + __DWCHCK(EVQ_RPTR_LBN, EVQ_RPTR_WIDTH); + __RANGECHCK(rptr, EVQ_RPTR_WIDTH); + + val = (rptr << EVQ_RPTR_LBN); + + EFHW_ASSERT(evq < nic->num_evqs); + + if (evq < FALCON_EVQ_CHAR) { + offset = EVQ_RPTR_REG_KER_OFST; + offset += evq * FALCON_REGISTER128; + + EFHW_ASSERT(!wakeup); /* don't try this at home */ + } else { + offset = EVQ_RPTR_REG_OFST + (FALCON_EVQ_CHAR * + FALCON_REGISTER128); + offset += (evq - FALCON_EVQ_CHAR) * FALCON_REGISTER128; + + /* nothing to do for interruptless event queues which do + * not want a wakeup */ + if (evq != FALCON_EVQ_CHAR && !wakeup) + return; + } + + EFHW_TRACE("%s: %x %x %x->%x", __func__, evq, rptr, wakeup, val); + + writel(val, efhw_kva + offset); + mmiowb(); +} + +/*---------------------------------------------------------------------------*/ + +static inline void +falcon_drv_ev(struct efhw_nic *nic, uint64_t data, uint qid) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + /* send an event from one driver to the other */ + EFHW_BUILD_ASSERT(DRV_EV_REG_KER_OFST == DRV_EV_REG_OFST); + EFHW_BUILD_ASSERT(DRV_EV_DATA_LBN == 0); + EFHW_BUILD_ASSERT(DRV_EV_DATA_WIDTH == 64); + EFHW_BUILD_ASSERT(DRV_EV_QID_LBN == 64); + EFHW_BUILD_ASSERT(DRV_EV_QID_WIDTH == 12); + + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + DRV_EV_REG_OFST, data, qid); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +_DEBUG_SYM_ void +falcon_ab_timer_tbl_set(struct efhw_nic *nic, + uint evq, /* timer id */ + uint mode, /* mode bits */ + uint countdown /* counting value to set */) +{ + FALCON_LOCK_DECL; + uint val; + ulong offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_BUILD_ASSERT(TIMER_VAL_LBN == 0); + + __DWCHCK(TIMER_MODE_LBN, TIMER_MODE_WIDTH); + __DWCHCK(TIMER_VAL_LBN, TIMER_VAL_WIDTH); + + __RANGECHCK(mode, TIMER_MODE_WIDTH); + __RANGECHCK(countdown, TIMER_VAL_WIDTH); + + val = ((mode << TIMER_MODE_LBN) | (countdown << TIMER_VAL_LBN)); + + if (evq < FALCON_EVQ_CHAR) { + offset = TIMER_CMD_REG_KER_OFST; + offset += evq * EFHW_8K; /* PAGE mapped register */ + } else { + offset = TIMER_TBL_OFST; + offset += evq * FALCON_REGISTER128; + } + EFHW_ASSERT(evq < nic->num_evqs); + + EFHW_TRACE("%s: evq %u mode %x (%s) time %x -> %08x", + __func__, evq, mode, + mode == 0 ? "DISABLE" : + mode == 1 ? "IMMED" : + mode == 2 ? (evq < 5 ? "HOLDOFF" : "RX_TRIG") : + "", countdown, val); + + /* Falcon requires 128 bit atomic access for this register when + * accessed from the driver. User access to timers is paged mapped + */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, val, FALCON_ATOMIC_TIMER_CMD_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return; +} + + +/*-------------------------------------------------------------------- + * + * Rate pacing - Low level interface + * + *--------------------------------------------------------------------*/ +void falcon_nic_pace(struct efhw_nic *nic, uint dmaq, uint pace) +{ + /* Pace specified in 2^(units of microseconds). This is the minimum + additional delay imposed over and above the IPG. + + Pacing only available on the virtual interfaces + */ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + ulong offset; + + if (pace > 20) + pace = 20; /* maxm supported value */ + + __DWCHCK(TX_PACE_LBN, TX_PACE_WIDTH); + __RANGECHCK(pace, TX_PACE_WIDTH); + + switch (nic->devtype.variant) { + case 'A': + EFHW_ASSERT(dmaq >= TX_PACE_TBL_FIRST_QUEUE_A1); + offset = TX_PACE_TBL_A1_OFST; + offset += (dmaq - TX_PACE_TBL_FIRST_QUEUE_A1) * 16; + break; + case 'B': + /* Would be nice to assert this, but as dmaq is unsigned and + * TX_PACE_TBL_FIRST_QUEUE_B0 is 0, it makes no sense + * EFHW_ASSERT(dmaq >= TX_PACE_TBL_FIRST_QUEUE_B0); + */ + offset = TX_PACE_TBL_B0_OFST; + offset += (dmaq - TX_PACE_TBL_FIRST_QUEUE_B0) * 16; + break; + default: + EFHW_ASSERT(0); + offset = 0; + break; + } + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, pace, FALCON_ATOMIC_PACE_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + + EFHW_TRACE("%s: txq %d offset=%lx pace=2^%x", + __func__, dmaq, offset, pace); +} + +/*-------------------------------------------------------------------- + * + * Interrupt - Low level interface + * + *--------------------------------------------------------------------*/ + +static void falcon_nic_handle_fatal_int(struct efhw_nic *nic) +{ + FALCON_LOCK_DECL; + volatile char __iomem *offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint64_t val; + + offset = (efhw_kva + FATAL_INTR_REG_OFST); + + /* Falcon requires 32 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + val = readl(offset); + FALCON_LOCK_UNLOCK(nic); + + /* ?? BUG3249 - need to disable illegal address interrupt */ + /* ?? BUG3114 - need to backport interrupt storm protection code */ + EFHW_ERR("fatal interrupt: %s%s%s%s%s%s%s%s%s%s%s%s[%" PRIx64 "]", + val & (1 << PCI_BUSERR_INT_CHAR_LBN) ? "PCI-bus-error " : "", + val & (1 << SRAM_OOB_INT_CHAR_LBN) ? "SRAM-oob " : "", + val & (1 << BUFID_OOB_INT_CHAR_LBN) ? "bufid-oob " : "", + val & (1 << MEM_PERR_INT_CHAR_LBN) ? "int-parity " : "", + val & (1 << RBUF_OWN_INT_CHAR_LBN) ? "rx-bufid-own " : "", + val & (1 << TBUF_OWN_INT_CHAR_LBN) ? "tx-bufid-own " : "", + val & (1 << RDESCQ_OWN_INT_CHAR_LBN) ? "rx-desc-own " : "", + val & (1 << TDESCQ_OWN_INT_CHAR_LBN) ? "tx-desc-own " : "", + val & (1 << EVQ_OWN_INT_CHAR_LBN) ? "evq-own " : "", + val & (1 << EVFF_OFLO_INT_CHAR_LBN) ? "evq-fifo " : "", + val & (1 << ILL_ADR_INT_CHAR_LBN) ? "ill-addr " : "", + val & (1 << SRM_PERR_INT_CHAR_LBN) ? "sram-parity " : "", val); +} + +static void falcon_nic_interrupt_hw_enable(struct efhw_nic *nic) +{ + FALCON_LOCK_DECL; + uint val; + volatile char __iomem *offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_BUILD_ASSERT(DRV_INT_EN_CHAR_WIDTH == 1); + + if (nic->flags & NIC_FLAG_NO_INTERRUPT) + return; + + offset = (efhw_kva + INT_EN_REG_CHAR_OFST); + val = 1 << DRV_INT_EN_CHAR_LBN; + + EFHW_NOTICE("%s: %x -> %x", __func__, (int)(offset - efhw_kva), + val); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(offset, val, FALCON_ATOMIC_INT_EN_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +static void falcon_nic_interrupt_hw_disable(struct efhw_nic *nic) +{ + FALCON_LOCK_DECL; + volatile char __iomem *offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + EFHW_BUILD_ASSERT(SRAM_PERR_INT_KER_WIDTH == 1); + EFHW_BUILD_ASSERT(DRV_INT_EN_KER_LBN == 0); + EFHW_BUILD_ASSERT(SRAM_PERR_INT_CHAR_WIDTH == 1); + EFHW_BUILD_ASSERT(DRV_INT_EN_CHAR_LBN == 0); + EFHW_BUILD_ASSERT(SRAM_PERR_INT_KER_LBN == SRAM_PERR_INT_CHAR_LBN); + EFHW_BUILD_ASSERT(DRV_INT_EN_KER_LBN == DRV_INT_EN_CHAR_LBN); + + if (nic->flags & NIC_FLAG_NO_INTERRUPT) + return; + + offset = (efhw_kva + INT_EN_REG_CHAR_OFST); + + EFHW_NOTICE("%s: %x -> 0", __func__, (int)(offset - efhw_kva)); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(offset, 0, FALCON_ATOMIC_INT_EN_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +static void falcon_nic_irq_addr_set(struct efhw_nic *nic, dma_addr_t dma_addr) +{ + FALCON_LOCK_DECL; + volatile char __iomem *offset; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + + offset = (efhw_kva + INT_ADR_REG_CHAR_OFST); + + EFHW_NOTICE("%s: %x -> " DMA_ADDR_T_FMT, __func__, + (int)(offset - efhw_kva), dma_addr); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(offset, dma_addr, FALCON_ATOMIC_INT_ADR_REG); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + + +/*-------------------------------------------------------------------- + * + * RXDP - low level interface + * + *--------------------------------------------------------------------*/ + +void +falcon_nic_set_rx_usr_buf_size(struct efhw_nic *nic, int usr_buf_bytes) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint64_t val, val2, usr_buf_size = usr_buf_bytes / 32; + int rubs_lbn, rubs_width, roec_lbn; + + EFHW_BUILD_ASSERT(RX_CFG_REG_OFST == RX_CFG_REG_KER_OFST); + + switch (nic->devtype.variant) { + default: + EFHW_ASSERT(0); + /* Fall-through to avoid compiler warnings. */ + case 'A': + rubs_lbn = RX_USR_BUF_SIZE_A1_LBN; + rubs_width = RX_USR_BUF_SIZE_A1_WIDTH; + roec_lbn = RX_OWNERR_CTL_A1_LBN; + break; + case 'B': + rubs_lbn = RX_USR_BUF_SIZE_B0_LBN; + rubs_width = RX_USR_BUF_SIZE_B0_WIDTH; + roec_lbn = RX_OWNERR_CTL_B0_LBN; + break; + } + + __DWCHCK(rubs_lbn, rubs_width); + __QWCHCK(roec_lbn, 1); + __RANGECHCK(usr_buf_size, rubs_width); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + RX_CFG_REG_OFST, &val, &val2); + + val &= ~((__FALCON_MASK64(rubs_width)) << rubs_lbn); + val |= (usr_buf_size << rubs_lbn); + + /* shouldn't be needed for a production driver */ + val |= ((uint64_t) 1 << roec_lbn); + + falcon_write_qq(efhw_kva + RX_CFG_REG_OFST, val, val2); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} +EXPORT_SYMBOL(falcon_nic_set_rx_usr_buf_size); + + +/*-------------------------------------------------------------------- + * + * TXDP - low level interface + * + *--------------------------------------------------------------------*/ + +_DEBUG_SYM_ void falcon_nic_tx_cfg(struct efhw_nic *nic, int unlocked) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint64_t val1, val2; + + EFHW_BUILD_ASSERT(TX_CFG_REG_OFST == TX_CFG_REG_KER_OFST); + __DWCHCK(TX_OWNERR_CTL_LBN, TX_OWNERR_CTL_WIDTH); + __DWCHCK(TX_NON_IP_DROP_DIS_LBN, TX_NON_IP_DROP_DIS_WIDTH); + + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + TX_CFG_REG_OFST, &val1, &val2); + + /* Will flag fatal interrupts on owner id errors. This should not be + on for production code because there is otherwise a denial of + serivce attack possible */ + val1 |= (1 << TX_OWNERR_CTL_LBN); + + /* Setup user queue TCP/UDP only packet security */ + if (unlocked) + val1 |= (1 << TX_NON_IP_DROP_DIS_LBN); + else + val1 &= ~(1 << TX_NON_IP_DROP_DIS_LBN); + + falcon_write_qq(efhw_kva + TX_CFG_REG_OFST, val1, val2); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +/*-------------------------------------------------------------------- + * + * Random thresholds - Low level interface (Would like these to be op + * defaults wherever possible) + * + *--------------------------------------------------------------------*/ + +void falcon_nic_pace_cfg(struct efhw_nic *nic, int fb_base, int bin_thresh) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + unsigned offset = 0; + uint64_t val; + + __DWCHCK(TX_PACE_FB_BASE_LBN, TX_PACE_FB_BASE_WIDTH); + __DWCHCK(TX_PACE_BIN_TH_LBN, TX_PACE_BIN_TH_WIDTH); + + switch (nic->devtype.variant) { + case 'A': offset = TX_PACE_REG_A1_OFST; break; + case 'B': offset = TX_PACE_REG_B0_OFST; break; + default: EFHW_ASSERT(0); break; + } + + val = (0x15 << TX_PACE_SB_NOTAF_LBN); + val |= (0xb << TX_PACE_SB_AF_LBN); + + val |= ((fb_base & __FALCON_MASK64(TX_PACE_FB_BASE_WIDTH)) << + TX_PACE_FB_BASE_LBN); + val |= ((bin_thresh & __FALCON_MASK64(TX_PACE_BIN_TH_WIDTH)) << + TX_PACE_BIN_TH_LBN); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + offset, val, 0); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + + +/********************************************************************** + * Implementation of the HAL. ******************************************** + **********************************************************************/ + +/*---------------------------------------------------------------------------- + * + * Initialisation and configuration discovery + * + *---------------------------------------------------------------------------*/ + +static int falcon_nic_init_irq_channel(struct efhw_nic *nic, int enable) +{ + /* create a buffer for the irq channel */ + int rc; + + if (enable) { + rc = efhw_iopage_alloc(nic, &nic->irq_iobuff); + if (rc < 0) + return rc; + + falcon_nic_irq_addr_set(nic, + efhw_iopage_dma_addr(&nic->irq_iobuff)); + } else { + if (efhw_iopage_is_valid(&nic->irq_iobuff)) + efhw_iopage_free(nic, &nic->irq_iobuff); + + efhw_iopage_mark_invalid(&nic->irq_iobuff); + falcon_nic_irq_addr_set(nic, 0); + } + + EFHW_TRACE("%s: %lx %sable", __func__, + (unsigned long) efhw_iopage_dma_addr(&nic->irq_iobuff), + enable ? "en" : "dis"); + + return 0; +} + +static void falcon_nic_close_hardware(struct efhw_nic *nic) +{ + /* check we are in possession of some hardware */ + if (!efhw_nic_have_hw(nic)) + return; + + falcon_nic_init_irq_channel(nic, 0); + falcon_nic_filter_dtor(nic); + + EFHW_NOTICE("%s:", __func__); +} + +static int +falcon_nic_init_hardware(struct efhw_nic *nic, + struct efhw_ev_handler *ev_handlers, + const uint8_t *mac_addr, int non_irq_evq) +{ + int rc; + + /* header sanity checks */ + FALCON_ASSERT_VALID(); + + /* Initialise supporting modules */ + rc = falcon_nic_filter_ctor(nic); + if (rc < 0) + return rc; + +#if FALCON_USE_SHADOW_BUFFER_TABLE + CI_ZERO_ARRAY(_falcon_buffer_table, FALCON_BUFFER_TBL_NUM); +#endif + + /* Initialise the top level hardware blocks */ + memcpy(nic->mac_addr, mac_addr, ETH_ALEN); + + EFHW_TRACE("%s:", __func__); + + /* nic.c:efhw_nic_init marks all the interrupt units as unused. + + ?? TODO we should be able to request the non-interrupting event + queue and the net driver's (for a net driver that is using libefhw) + additional RSS queues here. + + Result would be that that net driver could call + nic.c:efhw_nic_allocate_common_hardware_resources() and that the + IFDEF FALCON's can be removed from + nic.c:efhw_nic_allocate_common_hardware_resources() + */ + nic->irq_unit = INT_EN_REG_CHAR_OFST; + + /***************************************************************** + * The rest of this function deals with initialization of the NICs + * hardware (as opposed to the initialization of the + * struct efhw_nic data structure */ + + /* char driver grabs SRM events onto the non interrupting + * event queue */ + falcon_nic_srm_upd_evq(nic, non_irq_evq); + + /* RXDP tweaks */ + + /* ?? bug2396 rx_cfg should be ok so long as the net driver + * always pushes buffers big enough for the link MTU */ + + /* set the RX buffer cutoff size to be the same as PAGE_SIZE. + * Use this value when we think that there will be a lot of + * jumbo frames. + * + * The default value 1600 is useful when packets are small, + * but would means that jumbo frame RX queues would need more + * descriptors pushing */ + falcon_nic_set_rx_usr_buf_size(nic, FALCON_RX_USR_BUF_SIZE); + + /* TXDP tweaks */ + /* ?? bug2396 looks ok */ + falcon_nic_tx_cfg(nic, /*unlocked(for non-UDP/TCP)= */ 0); + falcon_nic_pace_cfg(nic, 4, 2); + + /* ?? bug2396 + * netdriver must load first or else must RMW this register */ + falcon_nic_rx_filter_ctl_set(nic, RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL, + RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD, + RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL, + RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD); + + if (!(nic->flags & NIC_FLAG_NO_INTERRUPT)) { + rc = efhw_keventq_ctor(nic, FALCON_EVQ_CHAR, + &nic->interrupting_evq, ev_handlers); + if (rc < 0) { + EFHW_ERR("%s: efhw_keventq_ctor() failed (%d) evq=%d", + __func__, rc, FALCON_EVQ_CHAR); + return rc; + } + } + rc = efhw_keventq_ctor(nic, non_irq_evq, + &nic->non_interrupting_evq, NULL); + if (rc < 0) { + EFHW_ERR("%s: efhw_keventq_ctor() failed (%d) evq=%d", + __func__, rc, non_irq_evq); + return rc; + } + + /* allocate IRQ channel */ + rc = falcon_nic_init_irq_channel(nic, 1); + /* ignore failure at user-level for eftest */ + if ((rc < 0) && !(nic->options & NIC_OPT_EFTEST)) + return rc; + + return 0; +} + +/*-------------------------------------------------------------------- + * + * Interrupt + * + *--------------------------------------------------------------------*/ + +static void +falcon_nic_interrupt_enable(struct efhw_nic *nic) +{ + struct efhw_keventq *q; + unsigned rdptr; + + if (nic->flags & NIC_FLAG_NO_INTERRUPT) + return; + + /* Enable driver interrupts */ + EFHW_NOTICE("%s: enable master interrupt", __func__); + falcon_nic_interrupt_hw_enable(nic); + + /* An interrupting eventq must start of day ack its read pointer */ + q = &nic->interrupting_evq; + rdptr = EFHW_EVENT_OFFSET(q, q, 1) / sizeof(efhw_event_t); + falcon_nic_evq_ack(nic, FALCON_EVQ_CHAR, rdptr, false); + EFHW_NOTICE("%s: ACK evq[%d]:%x", __func__, + FALCON_EVQ_CHAR, rdptr); +} + +static void falcon_nic_interrupt_disable(struct efhw_nic *nic) +{ + /* NB. No need to check for NIC_FLAG_NO_INTERRUPT, as + ** falcon_nic_interrupt_hw_disable() will do it. */ + falcon_nic_interrupt_hw_disable(nic); +} + +static void +falcon_nic_set_interrupt_moderation(struct efhw_nic *nic, int evq, + uint32_t val) +{ + if (evq < 0) + evq = FALCON_EVQ_CHAR; + + falcon_ab_timer_tbl_set(nic, evq, TIMER_MODE_INT_HLDOFF, val / 5); +} + +static inline void legacy_irq_ack(struct efhw_nic *nic) +{ + EFHW_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); + + if (!(nic->flags & NIC_FLAG_MSI)) { + writel(1, EFHW_KVA(nic) + INT_ACK_REG_CHAR_A1_OFST); + mmiowb(); + /* ?? FIXME: We should be doing a read here to ensure IRQ is + * thoroughly acked before we return from ISR. */ + } +} + +static int falcon_nic_interrupt(struct efhw_nic *nic) +{ + uint32_t *syserr_ptr = + (uint32_t *) efhw_iopage_ptr(&nic->irq_iobuff); + int handled = 0; + int done_ack = 0; + + EFHW_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); + EFHW_ASSERT(syserr_ptr); + + /* FIFO fill level interrupt - just log it. */ + if (unlikely(*(syserr_ptr + (DW0_OFST / 4)))) { + EFHW_WARN("%s: *** FIFO *** %x", __func__, + *(syserr_ptr + (DW0_OFST / 4))); + *(syserr_ptr + (DW0_OFST / 4)) = 0; + handled++; + } + + /* Fatal interrupts. */ + if (unlikely(*(syserr_ptr + (DW2_OFST / 4)))) { + *(syserr_ptr + (DW2_OFST / 4)) = 0; + falcon_nic_handle_fatal_int(nic); + handled++; + } + + /* Event queue interrupt. For legacy interrupts we have to check + * that the interrupt is for us, because it could be shared. */ + if (*(syserr_ptr + (DW1_OFST / 4))) { + *(syserr_ptr + (DW1_OFST / 4)) = 0; + /* ACK must come before callback to handler fn. */ + legacy_irq_ack(nic); + done_ack = 1; + handled++; + if (nic->irq_handler) + nic->irq_handler(nic, 0); + } + + if (unlikely(!done_ack)) { + if (!handled) + /* Shared interrupt line (hopefully). */ + return 0; + legacy_irq_ack(nic); + } + + EFHW_TRACE("%s: handled %d", __func__, handled); + return 1; +} + +/*-------------------------------------------------------------------- + * + * Event Management - and SW event posting + * + *--------------------------------------------------------------------*/ + +static void +falcon_nic_event_queue_enable(struct efhw_nic *nic, uint evq, uint evq_size, + dma_addr_t q_base_addr, /* not used */ + uint buf_base_id, int interrupting) +{ + EFHW_ASSERT(nic); + + /* Whether or not queue has an interrupt depends on + * instance number and h/w variant, so [interrupting] is + * ignored. + */ + falcon_ab_timer_tbl_set(nic, evq, 0/*disable*/, 0); + + falcon_nic_evq_ptr_tbl(nic, evq, 1, buf_base_id, evq_size); + EFHW_TRACE("%s: enable evq %u size %u", __func__, evq, evq_size); +} + +static void +falcon_nic_event_queue_disable(struct efhw_nic *nic, uint evq, int timer_only) +{ + EFHW_ASSERT(nic); + + falcon_ab_timer_tbl_set(nic, evq, 0 /* disable */ , 0); + + if (!timer_only) + falcon_nic_evq_ptr_tbl(nic, evq, 0, 0, 0); + EFHW_TRACE("%s: disenable evq %u", __func__, evq); +} + +static void +falcon_nic_wakeup_request(struct efhw_nic *nic, dma_addr_t q_base_addr, + int next_i, int evq) +{ + EFHW_ASSERT(evq > FALCON_EVQ_CHAR); + falcon_nic_evq_ack(nic, evq, next_i, true); + EFHW_TRACE("%s: evq %d next_i %d", __func__, evq, next_i); +} + +static void falcon_nic_sw_event(struct efhw_nic *nic, int data, int evq) +{ + uint64_t ev_data = data; + + ev_data &= ~FALCON_EVENT_CODE_MASK; + ev_data |= FALCON_EVENT_CODE_SW; + + falcon_drv_ev(nic, ev_data, evq); + EFHW_NOTICE("%s: evq[%d]->%x", __func__, evq, data); +} + + +/*-------------------------------------------------------------------- + * + * Buffer table - helpers + * + *--------------------------------------------------------------------*/ + +#define FALCON_LAZY_COMMIT_HWM (FALCON_BUFFER_UPD_MAX - 16) + +/* Note re.: + * falcon_nic_buffer_table_lazy_commit(struct efhw_nic *nic) + * falcon_nic_buffer_table_update_poll(struct efhw_nic *nic) + * falcon_nic_buffer_table_confirm(struct efhw_nic *nic) + * -- these are no-ops in the user-level driver because it would need to + * coordinate with the real driver on the number of outstanding commits. + * + * An exception is made for eftest apps, which manage the hardware without + * using the char driver. + */ + +static inline void falcon_nic_buffer_table_lazy_commit(struct efhw_nic *nic) +{ + /* Do nothing if operating in synchronous mode. */ + if (!nic->irq_handler) + return; +} + +static inline void falcon_nic_buffer_table_update_poll(struct efhw_nic *nic) +{ + FALCON_LOCK_DECL; + int count = 0, rc = 0; + + /* We can be called here early days */ + if (!nic->irq_handler) + return; + + /* If we need to gather buffer update events then poll the + non-interrupting event queue */ + + /* For each _buffer_table_commit there will be an update done + event. We don't keep track of how many buffers each commit has + committed, just make sure that all the expected events have been + gathered */ + FALCON_LOCK_LOCK(nic); + + EFHW_TRACE("%s: %d", __func__, nic->buf_commit_outstanding); + + while (nic->buf_commit_outstanding > 0) { + /* we're not expecting to handle any events that require + * upcalls into the core driver */ + struct efhw_ev_handler handler; + memset(&handler, 0, sizeof(handler)); + nic->non_interrupting_evq.ev_handlers = &handler; + rc = efhw_keventq_poll(nic, &nic->non_interrupting_evq); + nic->non_interrupting_evq.ev_handlers = NULL; + + if (rc < 0) { + EFHW_ERR("%s: poll ERROR (%d:%d) ***** ", + __func__, rc, + nic->buf_commit_outstanding); + goto out; + } + + FALCON_LOCK_UNLOCK(nic); + + if (count++) + udelay(1); + + if (count > 1000) { + EFHW_WARN("%s: poll Timeout ***** (%d)", __func__, + nic->buf_commit_outstanding); + nic->buf_commit_outstanding = 0; + return; + } + FALCON_LOCK_LOCK(nic); + } + +out: + FALCON_LOCK_UNLOCK(nic); + return; +} + +void falcon_nic_buffer_table_confirm(struct efhw_nic *nic) +{ + /* confirm buffer table updates - should be used for items where + loss of data would be unacceptable. E.g for the buffers that back + an event or DMA queue */ + FALCON_LOCK_DECL; + + /* Do nothing if operating in synchronous mode. */ + if (!nic->irq_handler) + return; + + FALCON_LOCK_LOCK(nic); + + _falcon_nic_buffer_table_commit(nic); + + FALCON_LOCK_UNLOCK(nic); + + falcon_nic_buffer_table_update_poll(nic); +} + +/*-------------------------------------------------------------------- + * + * Buffer table - API + * + *--------------------------------------------------------------------*/ + +static void +falcon_nic_buffer_table_clear(struct efhw_nic *nic, int buffer_id, int num) +{ + FALCON_LOCK_DECL; + FALCON_LOCK_LOCK(nic); + _falcon_nic_buffer_table_clear(nic, buffer_id, num); + FALCON_LOCK_UNLOCK(nic); +} + +static void +falcon_nic_buffer_table_set(struct efhw_nic *nic, dma_addr_t dma_addr, + uint bufsz, uint region, + int own_id, int buffer_id) +{ + FALCON_LOCK_DECL; + + EFHW_ASSERT(region < FALCON_REGION_NUM); + + EFHW_ASSERT((bufsz == EFHW_4K) || + (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); + + falcon_nic_buffer_table_update_poll(nic); + + FALCON_LOCK_LOCK(nic); + + _falcon_nic_buffer_table_set(nic, dma_addr, bufsz, region, own_id, + buffer_id); + + falcon_nic_buffer_table_lazy_commit(nic); + + FALCON_LOCK_UNLOCK(nic); +} + +void +falcon_nic_buffer_table_set_n(struct efhw_nic *nic, int buffer_id, + dma_addr_t dma_addr, uint bufsz, uint region, + int n_pages, int own_id) +{ + /* used to set up a contiguous range of buffers */ + FALCON_LOCK_DECL; + + EFHW_ASSERT(region < FALCON_REGION_NUM); + + EFHW_ASSERT((bufsz == EFHW_4K) || + (bufsz == EFHW_8K && FALCON_BUFFER_TABLE_FULL_MODE)); + + while (n_pages--) { + + falcon_nic_buffer_table_update_poll(nic); + + FALCON_LOCK_LOCK(nic); + + _falcon_nic_buffer_table_set(nic, dma_addr, bufsz, region, + own_id, buffer_id++); + + falcon_nic_buffer_table_lazy_commit(nic); + + FALCON_LOCK_UNLOCK(nic); + + dma_addr += bufsz; + } +} + +/*-------------------------------------------------------------------- + * + * DMA Queues - mid level API + * + *--------------------------------------------------------------------*/ + +#if BUG5302_WORKAROUND + +/* Tx queues can get stuck if the software write pointer is set to an index + * beyond the configured size of the queue, such that they will not flush. + * This code can be run before attempting a flush; it will detect the bogus + * value and reset it. This fixes most instances of this problem, although + * sometimes it does not work, or we may not detect it in the first place, + * if the out-of-range value was replaced by an in-range value earlier. + * (In those cases we have to apply a bigger hammer later, if we see that + * the queue is still not flushing.) + */ +static void +falcon_check_for_bogus_tx_dma_wptr(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val_low64, val_high64; + uint64_t size, hwptr, swptr, val; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + ulong offset = falcon_dma_tx_q_offset(nic, dmaq); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); + FALCON_LOCK_UNLOCK(nic); + + size = (val_low64 >> TX_DESCQ_SIZE_LBN) + & __FALCON_MASK64(TX_DESCQ_SIZE_WIDTH); + size = (1 << size) * 512; + hwptr = (val_high64 >> __DW3(TX_DESCQ_HW_RPTR_LBN)) + & __FALCON_MASK64(TX_DESCQ_HW_RPTR_WIDTH); + swptr = (val_low64 >> TX_DESCQ_SW_WPTR_LBN) + & __FALCON_MASK64(__LW2(TX_DESCQ_SW_WPTR_LBN)); + val = (val_high64) + & + __FALCON_MASK64(__DW3 + (TX_DESCQ_SW_WPTR_LBN + TX_DESCQ_SW_WPTR_WIDTH)); + val = val << __LW2(TX_DESCQ_SW_WPTR_LBN); + swptr = swptr | val; + + if (swptr >= size) { + EFHW_WARN("Resetting bad write pointer for TXQ[%d]", dmaq); + writel((uint32_t) ((hwptr + 0) & (size - 1)), + efhw_kva + falcon_tx_dma_page_addr(dmaq) + 12); + mmiowb(); + } +} + +/* Here's that "bigger hammer": we reset all the pointers (hardware read, + * hardware descriptor cache read, software write) to zero. + */ +void falcon_clobber_tx_dma_ptrs(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val_low64, val_high64; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + ulong offset = falcon_dma_tx_q_offset(nic, dmaq); + + EFHW_WARN("Recovering stuck TXQ[%d]", dmaq); + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); + val_high64 &= ~(__FALCON_MASK64(TX_DESCQ_HW_RPTR_WIDTH) + << __DW3(TX_DESCQ_HW_RPTR_LBN)); + val_high64 &= ~(__FALCON_MASK64(TX_DC_HW_RPTR_WIDTH) + << __DW3(TX_DC_HW_RPTR_LBN)); + falcon_write_qq(efhw_kva + offset, val_low64, val_high64); + mmiowb(); + writel(0, efhw_kva + falcon_tx_dma_page_addr(dmaq) + 12); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} + +#endif + +static inline int +__falcon_really_flush_tx_dma_channel(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint val; + + EFHW_BUILD_ASSERT(TX_FLUSH_DESCQ_REG_KER_OFST == + TX_FLUSH_DESCQ_REG_OFST); + + __DWCHCK(TX_FLUSH_DESCQ_CMD_LBN, TX_FLUSH_DESCQ_CMD_WIDTH); + __DWCHCK(TX_FLUSH_DESCQ_LBN, TX_FLUSH_DESCQ_WIDTH); + __RANGECHCK(dmaq, TX_FLUSH_DESCQ_WIDTH); + + val = ((1 << TX_FLUSH_DESCQ_CMD_LBN) | (dmaq << TX_FLUSH_DESCQ_LBN)); + + EFHW_TRACE("TX DMA flush[%d]", dmaq); + +#if BUG5302_WORKAROUND + falcon_check_for_bogus_tx_dma_wptr(nic, dmaq); +#endif + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + TX_FLUSH_DESCQ_REG_OFST, + val, FALCON_ATOMIC_TX_FLUSH_DESCQ); + + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return 0; +} + +static inline int +__falcon_is_tx_dma_channel_flushed(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val_low64, val_high64; + uint64_t enable, flush_pending; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + ulong offset = falcon_dma_tx_q_offset(nic, dmaq); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + offset, &val_low64, &val_high64); + FALCON_LOCK_UNLOCK(nic); + + /* should see one of three values for these 2 bits + * 1, queue enabled no flush pending + * - i.e. first flush request + * 2, queue enabled, flush pending + * - i.e. request to reflush before flush finished + * 3, queue disabled (no flush pending) + * - flush complete + */ + __DWCHCK(TX_DESCQ_FLUSH_LBN, TX_DESCQ_FLUSH_WIDTH); + __DW3CHCK(TX_DESCQ_EN_LBN, TX_DESCQ_EN_WIDTH); + enable = val_high64 & (1 << __DW3(TX_DESCQ_EN_LBN)); + flush_pending = val_low64 & (1 << TX_DESCQ_FLUSH_LBN); + + if (enable && !flush_pending) + return 0; + + EFHW_TRACE("%d, %s: %s, %sflush pending", dmaq, __func__, + enable ? "enabled" : "disabled", + flush_pending ? "" : "NO "); + /* still in progress */ + if (enable && flush_pending) + return -EALREADY; + + return -EAGAIN; +} + +static int falcon_flush_tx_dma_channel(struct efhw_nic *nic, uint dmaq) +{ + int rc; + rc = __falcon_is_tx_dma_channel_flushed(nic, dmaq); + if (rc < 0) { + EFHW_WARN("%s: failed %d", __func__, rc); + return rc; + } + return __falcon_really_flush_tx_dma_channel(nic, dmaq); +} + +static int +__falcon_really_flush_rx_dma_channel(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + uint val; + + EFHW_BUILD_ASSERT(RX_FLUSH_DESCQ_REG_KER_OFST == + RX_FLUSH_DESCQ_REG_OFST); + + __DWCHCK(RX_FLUSH_DESCQ_CMD_LBN, RX_FLUSH_DESCQ_CMD_WIDTH); + __DWCHCK(RX_FLUSH_DESCQ_LBN, RX_FLUSH_DESCQ_WIDTH); + __RANGECHCK(dmaq, RX_FLUSH_DESCQ_WIDTH); + + val = ((1 << RX_FLUSH_DESCQ_CMD_LBN) | (dmaq << RX_FLUSH_DESCQ_LBN)); + + EFHW_TRACE("RX DMA flush[%d]", dmaq); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_write_qq(efhw_kva + RX_FLUSH_DESCQ_REG_OFST, val, + FALCON_ATOMIC_RX_FLUSH_DESCQ); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); + return 0; +} + +static inline int +__falcon_is_rx_dma_channel_flushed(struct efhw_nic *nic, uint dmaq) +{ + FALCON_LOCK_DECL; + uint64_t val; + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + ulong offset = falcon_dma_rx_q_offset(nic, dmaq); + + /* Falcon requires 128 bit atomic access for this register */ + FALCON_LOCK_LOCK(nic); + falcon_read_q(efhw_kva + offset, &val); + FALCON_LOCK_UNLOCK(nic); + + __DWCHCK(RX_DESCQ_EN_LBN, RX_DESCQ_EN_WIDTH); + + /* is it enabled? */ + return (val & (1 << RX_DESCQ_EN_LBN)) + ? 0 : -EAGAIN; +} + +static int falcon_flush_rx_dma_channel(struct efhw_nic *nic, uint dmaq) +{ + int rc; + rc = __falcon_is_rx_dma_channel_flushed(nic, dmaq); + if (rc < 0) { + EFHW_ERR("%s: failed %d", __func__, rc); + return rc; + } + return __falcon_really_flush_rx_dma_channel(nic, dmaq); +} + +/*-------------------------------------------------------------------- + * + * Falcon specific event callbacks + * + *--------------------------------------------------------------------*/ + +int +falcon_handle_char_event(struct efhw_nic *nic, struct efhw_ev_handler *h, + efhw_event_t *ev) +{ + EFHW_TRACE("DRIVER EVENT: "FALCON_EVENT_FMT, + FALCON_EVENT_PRI_ARG(*ev)); + + switch (FALCON_EVENT_DRIVER_SUBCODE(ev)) { + + case TX_DESCQ_FLS_DONE_EV_DECODE: + EFHW_TRACE("TX[%d] flushed", + (int)FALCON_EVENT_TX_FLUSH_Q_ID(ev)); + efhw_handle_txdmaq_flushed(nic, h, ev); + break; + + case RX_DESCQ_FLS_DONE_EV_DECODE: + EFHW_TRACE("RX[%d] flushed", + (int)FALCON_EVENT_TX_FLUSH_Q_ID(ev)); + efhw_handle_rxdmaq_flushed(nic, h, ev); + break; + + case SRM_UPD_DONE_EV_DECODE: + nic->buf_commit_outstanding = + max(0, nic->buf_commit_outstanding - 1); + EFHW_TRACE("COMMIT DONE %d", nic->buf_commit_outstanding); + break; + + case EVQ_INIT_DONE_EV_DECODE: + EFHW_TRACE("%sEVQ INIT", ""); + break; + + case WAKE_UP_EV_DECODE: + EFHW_TRACE("%sWAKE UP", ""); + efhw_handle_wakeup_event(nic, h, ev); + break; + + case TIMER_EV_DECODE: + EFHW_TRACE("%sTIMER", ""); + efhw_handle_timeout_event(nic, h, ev); + break; + + case RX_DESCQ_FLSFF_OVFL_EV_DECODE: + /* This shouldn't happen. */ + EFHW_ERR("%s: RX flush fifo overflowed", __func__); + return -EINVAL; + + default: + EFHW_TRACE("UNKOWN DRIVER EVENT: " FALCON_EVENT_FMT, + FALCON_EVENT_PRI_ARG(*ev)); + break; + } + return 0; +} + + +/*-------------------------------------------------------------------- + * + * Filter search depth control + * + *--------------------------------------------------------------------*/ + + +#define Q0_READ(q0, name) \ + ((unsigned)(((q0) >> name##_LBN) & (__FALCON_MASK64(name##_WIDTH)))) +#define Q0_MASK(name) \ + ((__FALCON_MASK64(name##_WIDTH)) << name##_LBN) +#define Q0_VALUE(name, value) \ + (((uint64_t)(value)) << name##_LBN) + +#define Q1_READ(q1, name) \ + ((unsigned)(((q1) >> (name##_LBN - 64)) & \ + (__FALCON_MASK64(name##_WIDTH)))) +#define Q1_MASK(name) \ + ((__FALCON_MASK64(name##_WIDTH)) << (name##_LBN - 64)) +#define Q1_VALUE(name, value) \ + (((uint64_t)(value)) << (name##_LBN - 64)) + + +void +falcon_nic_get_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values) +{ + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + FALCON_LOCK_DECL; + uint64_t q0, q1; + unsigned ff = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_FULL); + unsigned wf = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_WILD); + + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, &q0, &q1); + FALCON_LOCK_UNLOCK(nic); + + lim->tcp_full = Q0_READ(q0, TCP_FULL_SRCH_LIMIT) - ff; + lim->tcp_wild = Q0_READ(q0, TCP_WILD_SRCH_LIMIT) - wf; + lim->udp_full = Q0_READ(q0, UDP_FULL_SRCH_LIMIT) - ff; + lim->udp_wild = Q0_READ(q0, UDP_WILD_SRCH_LIMIT) - wf; +} +EXPORT_SYMBOL(falcon_nic_get_rx_filter_search_limits); + + +void +falcon_nic_set_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values) +{ + volatile char __iomem *efhw_kva = EFHW_KVA(nic); + FALCON_LOCK_DECL; + uint64_t q0, q1; + unsigned ff = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_FULL); + unsigned wf = (use_raw_values ? 0 : RX_FILTER_CTL_SRCH_FUDGE_WILD); + + FALCON_LOCK_LOCK(nic); + falcon_read_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, &q0, &q1); + + q0 &= ~Q0_MASK(TCP_FULL_SRCH_LIMIT); + q0 &= ~Q0_MASK(TCP_WILD_SRCH_LIMIT); + q0 &= ~Q0_MASK(UDP_FULL_SRCH_LIMIT); + q0 &= ~Q0_MASK(UDP_WILD_SRCH_LIMIT); + q0 |= Q0_VALUE(TCP_FULL_SRCH_LIMIT, lim->tcp_full + ff); + q0 |= Q0_VALUE(TCP_WILD_SRCH_LIMIT, lim->tcp_wild + wf); + q0 |= Q0_VALUE(UDP_FULL_SRCH_LIMIT, lim->udp_full + ff); + q0 |= Q0_VALUE(UDP_WILD_SRCH_LIMIT, lim->udp_wild + wf); + nic->tcp_full_srch.max = lim->tcp_full + ff + - RX_FILTER_CTL_SRCH_FUDGE_FULL; + nic->tcp_wild_srch.max = lim->tcp_wild + wf + - RX_FILTER_CTL_SRCH_FUDGE_WILD; + nic->udp_full_srch.max = lim->udp_full + ff + - RX_FILTER_CTL_SRCH_FUDGE_FULL; + nic->udp_wild_srch.max = lim->udp_wild + wf + - RX_FILTER_CTL_SRCH_FUDGE_WILD; + + falcon_write_qq(efhw_kva + RX_FILTER_CTL_REG_OFST, q0, q1); + mmiowb(); + FALCON_LOCK_UNLOCK(nic); +} +EXPORT_SYMBOL(falcon_nic_set_rx_filter_search_limits); + + +#undef READ_Q0 +#undef Q0_MASK +#undef Q0_VALUE +#undef READ_Q1 +#undef Q1_MASK +#undef Q1_VALUE + + +/*-------------------------------------------------------------------- + * + * New unified filter API + * + *--------------------------------------------------------------------*/ + + +#if FALCON_FULL_FILTER_CACHE +static inline struct efhw_filter_spec * +filter_spec_cache_entry(struct efhw_nic *nic, int filter_idx) +{ + EFHW_ASSERT(nic->filter_spec_cache); + return &nic->filter_spec_cache[filter_idx]; +} +#endif + + +static int filter_is_active(struct efhw_nic *nic, int filter_idx) +{ + return nic->filter_in_use[filter_idx]; +} + + +static void set_filter_cache_entry(struct efhw_nic *nic, + struct efhw_filter_spec *spec, + int filter_idx) +{ + nic->filter_in_use[filter_idx] = 1; +#if FALCON_FULL_FILTER_CACHE + memcpy(filter_spec_cache_entry(nic, filter_idx), spec, + sizeof(struct efhw_filter_spec)); +#endif +} + + +static void clear_filter_cache_entry(struct efhw_nic *nic, + int filter_idx) +{ + nic->filter_in_use[filter_idx] = 0; +#if FALCON_FULL_FILTER_CACHE + memset(filter_spec_cache_entry(nic, filter_idx), 0, + sizeof(struct efhw_filter_spec)); +#endif +} + + +#if FALCON_FULL_FILTER_CACHE +static int filter_is_duplicate(struct efhw_nic *nic, + struct efhw_filter_spec *spec, int filter_idx) +{ + struct efhw_filter_spec *cmp; + + cmp = filter_spec_cache_entry(nic, filter_idx); + + EFHW_ASSERT(filter_is_active(nic, filter_idx)); + + return (spec->saddr_le32 == cmp->saddr_le32) && + (spec->daddr_le32 == cmp->daddr_le32) && + (spec->sport_le16 == cmp->sport_le16) && + (spec->dport_le16 == cmp->dport_le16) && + (spec->tcp == cmp->tcp) && + (spec->full == cmp->full); +} +#endif + + +static void common_build_ip_filter(struct efhw_nic *nic, int tcp, int full, + int rss, int scatter, uint dmaq_id, + unsigned saddr_le32, unsigned sport_le16, + unsigned daddr_le32, unsigned dport_le16, + uint64_t *q0, uint64_t *q1) +{ + uint64_t v1, v2, v3, v4; + unsigned tmp_port_le16; + + if (!full) { + saddr_le32 = 0; + sport_le16 = 0; + if (!tcp) { + tmp_port_le16 = sport_le16; + sport_le16 = dport_le16; + dport_le16 = tmp_port_le16; + } + } + + v4 = (((!tcp) << __DW4(TCP_UDP_0_LBN)) | + (dmaq_id << __DW4(RXQ_ID_0_LBN))); + + switch (nic->devtype.variant) { + case 'A': + EFHW_ASSERT(!rss); + break; + case 'B': + v4 |= scatter << __DW4(SCATTER_EN_0_B0_LBN); + v4 |= rss << __DW4(RSS_EN_0_B0_LBN); + break; + default: + EFHW_ASSERT(0); + break; + } + + v3 = daddr_le32; + v2 = ((dport_le16 << __DW2(DEST_PORT_TCP_0_LBN)) | + (__HIGH(saddr_le32, SRC_IP_0_LBN, SRC_IP_0_WIDTH))); + v1 = ((__LOW(saddr_le32, SRC_IP_0_LBN, SRC_IP_0_WIDTH)) | + (sport_le16 << SRC_TCP_DEST_UDP_0_LBN)); + + *q0 = (v2 << 32) | v1; + *q1 = (v4 << 32) | v3; +} + + +static void build_filter(struct efhw_nic *nic, struct efhw_filter_spec *spec, + unsigned *key, unsigned *tbl_size, + struct efhw_filter_depth **depth, + uint64_t *q0, uint64_t *q1) +{ + *key = falcon_hash_get_ip_key(spec->saddr_le32, + spec->sport_le16, + spec->daddr_le32, + spec->dport_le16, + spec->tcp, + spec->full); + *tbl_size = nic->ip_filter_tbl_size; + if (spec->tcp && spec->full) + *depth = &nic->tcp_full_srch; + else if (spec->tcp && !spec->full) + *depth = &nic->tcp_wild_srch; + else if (!spec->tcp && spec->full) + *depth = &nic->udp_full_srch; + else + *depth = &nic->udp_wild_srch; + common_build_ip_filter(nic, spec->tcp, spec->full, + spec->rss, spec->scatter, + spec->dmaq_id, + spec->saddr_le32, + spec->sport_le16, + spec->daddr_le32, + spec->dport_le16, + q0, q1); +} + + +#if FALCON_VERIFY_FILTERS +static void verify_filters(struct efhw_nic *nic) +{ + unsigned table_offset, table_stride; + unsigned i, dummy_key, dummy_tbl_size; + struct efhw_filter_depth *dummy_depth; + unsigned filter_tbl_size; + struct efhw_filter_spec *spec; + uint64_t q0_expect, q1_expect, q0_got, q1_got; + + filter_tbl_size = nic->ip_filter_tbl_size; + table_offset = RX_FILTER_TBL0_OFST; + table_stride = 2 * FALCON_REGISTER128; + + for (i = 0; i < filter_tbl_size; i++) { + if (!filter_is_active(nic, type, i)) + continue; + + spec = filter_spec_cache_entry(nic, type, i); + + build_filter(nic, spec, &dummy_key, &dummy_tbl_size, + &dummy_depth, &q0_expect, &q1_expect); + + falcon_read_qq(EFHW_KVA(nic) + table_offset + i * table_stride, + &q0_got, &q1_got); + + if ((q0_got != q0_expect) || (q1_got != q1_expect)) { + falcon_write_qq(EFHW_KVA(nic) + 0x300, + q0_got, q1_got); + EFHW_ERR("ERROR: RX-filter[%d][%d] was " + "%"PRIx64":%" PRIx64" expected " + "%"PRIx64":%"PRIx64, + nic->index, i, q0_got, q1_got, + q0_expect, q1_expect); + } + } +} +#endif + + +static void write_filter_table_entry(struct efhw_nic *nic, + unsigned filter_idx, + uint64_t q0, uint64_t q1) +{ + unsigned table_offset, table_stride, offset; + + EFHW_ASSERT(filter_idx < nic->ip_filter_tbl_size); + table_offset = RX_FILTER_TBL0_OFST; + table_stride = 2 * FALCON_REGISTER128; + + offset = table_offset + filter_idx * table_stride; + falcon_write_qq(EFHW_KVA(nic) + offset, q0, q1); + mmiowb(); + +#if FALCON_VERIFY_FILTERS + { + uint64_t q0read, q1read; + + /* Read a different entry first - ensure BIU flushed shadow */ + falcon_read_qq(EFHW_KVA(nic) + offset + 0x10, &q0read, &q1read); + falcon_read_qq(EFHW_KVA(nic) + offset, &q0read, &q1read); + EFHW_ASSERT(q0read == q0); + EFHW_ASSERT(q1read == q1); + + verify_filters(nic, type); + } +#endif +} + + +static int falcon_nic_filter_set(struct efhw_nic *nic, + struct efhw_filter_spec *spec, + int *filter_idx_out) +{ + FALCON_LOCK_DECL; + unsigned key = 0, tbl_size = 0, hash1, hash2, k; + struct efhw_filter_depth *depth = NULL; + int filter_idx = -1; + int rc = 0; + uint64_t q0, q1; + + build_filter(nic, spec, &key, &tbl_size, &depth, &q0, &q1); + + if (tbl_size == 0) + return -EINVAL; + + EFHW_TRACE("%s: depth->max=%d", __func__, depth->max); + + hash1 = falcon_hash_function1(key, tbl_size); + hash2 = falcon_hash_function2(key, tbl_size); + + FALCON_LOCK_LOCK(nic); + + for (k = 0; k < depth->max; k++) { + filter_idx = falcon_hash_iterator(hash1, hash2, k, tbl_size); + if (!filter_is_active(nic, filter_idx)) + break; +#if FALCON_FULL_FILTER_CACHE + if (filter_is_duplicate(nic, spec, filter_idx)) { + EFHW_WARN("%s: ERROR: duplicate filter (disabling " + "interrupts)", __func__); + falcon_nic_interrupt_hw_disable(nic); + rc = -EINVAL; + goto fail1; + } +#endif + } + if (k == depth->max) { + rc = -EADDRINUSE; + filter_idx = -1; + goto fail1; + } else if (depth->needed < (k + 1)) { + depth->needed = k + 1; + } + + EFHW_ASSERT(filter_idx < (int)tbl_size); + + set_filter_cache_entry(nic, spec, filter_idx); + write_filter_table_entry(nic, filter_idx, q0, q1); + + ++nic->ip_filter_tbl_used; + + *filter_idx_out = filter_idx; + + EFHW_TRACE("%s: filter index %d rxq %u set in %u", + __func__, filter_idx, spec->dmaq_id, k); + +fail1: + FALCON_LOCK_UNLOCK(nic); + return rc; +} + + +static void falcon_nic_filter_clear(struct efhw_nic *nic, + int filter_idx) +{ + FALCON_LOCK_DECL; + + if (filter_idx < 0) + return; + + FALCON_LOCK_LOCK(nic); + if (filter_is_active(nic, filter_idx)) { + if (--nic->ip_filter_tbl_used == 0) { + nic->tcp_full_srch.needed = 0; + nic->tcp_wild_srch.needed = 0; + nic->udp_full_srch.needed = 0; + nic->udp_wild_srch.needed = 0; + } + } + clear_filter_cache_entry(nic, filter_idx); + write_filter_table_entry(nic, filter_idx, 0, 0); + FALCON_LOCK_UNLOCK(nic); +} + + +int +falcon_nic_filter_ctor(struct efhw_nic *nic) +{ + nic->ip_filter_tbl_size = 8 * 1024; + nic->ip_filter_tbl_used = 0; + + nic->tcp_full_srch.needed = 0; + nic->tcp_full_srch.max = RX_FILTER_CTL_SRCH_LIMIT_TCP_FULL + - RX_FILTER_CTL_SRCH_FUDGE_FULL; + nic->tcp_wild_srch.needed = 0; + nic->tcp_wild_srch.max = RX_FILTER_CTL_SRCH_LIMIT_TCP_WILD + - RX_FILTER_CTL_SRCH_FUDGE_WILD; + nic->udp_full_srch.needed = 0; + nic->udp_full_srch.max = RX_FILTER_CTL_SRCH_LIMIT_UDP_FULL + - RX_FILTER_CTL_SRCH_FUDGE_FULL; + nic->udp_wild_srch.needed = 0; + nic->udp_wild_srch.max = RX_FILTER_CTL_SRCH_LIMIT_UDP_WILD + - RX_FILTER_CTL_SRCH_FUDGE_WILD; + + nic->filter_in_use = vmalloc(FALCON_FILTER_TBL_NUM); + if (nic->filter_in_use == NULL) + return -ENOMEM; + memset(nic->filter_in_use, 0, FALCON_FILTER_TBL_NUM); +#if FALCON_FULL_FILTER_CACHE + nic->filter_spec_cache = vmalloc(FALCON_FILTER_TBL_NUM + * sizeof(struct efhw_filter_spec)); + if (nic->filter_spec_cache == NULL) + return -ENOMEM; + memset(nic->filter_spec_cache, 0, FALCON_FILTER_TBL_NUM + * sizeof(struct efhw_filter_spec)); +#endif + + return 0; +} + + +void +falcon_nic_filter_dtor(struct efhw_nic *nic) +{ +#if FALCON_FULL_FILTER_CACHE + if (nic->filter_spec_cache) + vfree(nic->filter_spec_cache); +#endif + if (nic->filter_in_use) + vfree(nic->filter_in_use); +} + + +/*-------------------------------------------------------------------- + * + * Compatibility with old filter API + * + *--------------------------------------------------------------------*/ + +void +falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, + uint32_t *tcp_wild, + uint32_t *udp_full, uint32_t *udp_wild) +{ + struct efhw_filter_search_limits lim; + + falcon_nic_get_rx_filter_search_limits(nic, &lim, 0); + *tcp_full = (uint32_t)lim.tcp_full; + *tcp_wild = (uint32_t)lim.tcp_wild; + *udp_full = (uint32_t)lim.udp_full; + *udp_wild = (uint32_t)lim.udp_wild; +} +EXPORT_SYMBOL(falcon_nic_rx_filter_ctl_get); + + +void +falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, + uint32_t tcp_wild, + uint32_t udp_full, uint32_t udp_wild) +{ + struct efhw_filter_search_limits lim; + + lim.tcp_full = (unsigned)tcp_full; + lim.tcp_wild = (unsigned)tcp_wild; + lim.udp_full = (unsigned)udp_full; + lim.udp_wild = (unsigned)udp_wild; + falcon_nic_set_rx_filter_search_limits(nic, &lim, 0); +} +EXPORT_SYMBOL(falcon_nic_rx_filter_ctl_set); + + +static int +falcon_nic_ipfilter_set(struct efhw_nic *nic, int type, int *_filter_idx, + int dmaq, + unsigned saddr_be32, unsigned sport_be16, + unsigned daddr_be32, unsigned dport_be16) +{ + struct efhw_filter_spec spec; + + spec.dmaq_id = dmaq; + spec.saddr_le32 = ntohl(saddr_be32); + spec.daddr_le32 = ntohl(daddr_be32); + spec.sport_le16 = ntohs((unsigned short) sport_be16); + spec.dport_le16 = ntohs((unsigned short) dport_be16); + spec.tcp = ((type & EFHW_IP_FILTER_TYPE_TCP_MASK) != 0); + spec.full = ((type & EFHW_IP_FILTER_TYPE_FULL_MASK) != 0); + spec.rss = ((type & EFHW_IP_FILTER_TYPE_RSS_B0_MASK) != 0); + spec.scatter = ((type & EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK) == 0); + return falcon_nic_filter_set(nic, &spec, _filter_idx); +} + +static void falcon_nic_ipfilter_clear(struct efhw_nic *nic, int filter_idx) +{ + falcon_nic_filter_clear(nic, filter_idx); +} + + +/*-------------------------------------------------------------------- + * + * Abstraction Layer Hooks + * + *--------------------------------------------------------------------*/ + +struct efhw_func_ops falcon_char_functional_units = { + falcon_nic_close_hardware, + falcon_nic_init_hardware, + falcon_nic_interrupt, + falcon_nic_interrupt_enable, + falcon_nic_interrupt_disable, + falcon_nic_set_interrupt_moderation, + falcon_nic_event_queue_enable, + falcon_nic_event_queue_disable, + falcon_nic_wakeup_request, + falcon_nic_sw_event, + falcon_nic_ipfilter_set, + falcon_nic_ipfilter_clear, + falcon_dmaq_tx_q_init, + falcon_dmaq_rx_q_init, + falcon_dmaq_tx_q_disable, + falcon_dmaq_rx_q_disable, + falcon_flush_tx_dma_channel, + falcon_flush_rx_dma_channel, + falcon_nic_buffer_table_set, + falcon_nic_buffer_table_set_n, + falcon_nic_buffer_table_clear, + falcon_nic_buffer_table_commit, + falcon_nic_filter_set, + falcon_nic_filter_clear, +}; + + --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/nic.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/nic.c @@ -0,0 +1,176 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains EtherFabric Generic NIC instance (init, interrupts, + * etc) + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include + + +int efhw_device_type_init(struct efhw_device_type *dt, + int vendor_id, int device_id, + int class_revision) +{ + if (vendor_id != 0x1924) + return 0; + + switch (device_id) { + case 0x0703: + case 0x6703: + dt->arch = EFHW_ARCH_FALCON; + dt->variant = 'A'; + switch (class_revision) { + case 0: + dt->revision = 0; + break; + case 1: + dt->revision = 1; + break; + default: + return 0; + } + break; + case 0x0710: + dt->arch = EFHW_ARCH_FALCON; + dt->variant = 'B'; + switch (class_revision) { + case 2: + dt->revision = 0; + break; + default: + return 0; + } + break; + default: + return 0; + } + + return 1; +} + + +/*-------------------------------------------------------------------- + * + * NIC Initialisation + * + *--------------------------------------------------------------------*/ + +/* make this separate from initialising data structure +** to allow this to be called at a later time once we can access PCI +** config space to find out what hardware we have +*/ +void efhw_nic_init(struct efhw_nic *nic, unsigned flags, unsigned options, + struct efhw_device_type dev_type) +{ + nic->devtype = dev_type; + nic->flags = flags; + nic->options = options; + nic->bar_ioaddr = 0; + spin_lock_init(&nic->the_reg_lock); + nic->reg_lock = &nic->the_reg_lock; + nic->mtu = 1500 + ETH_HLEN; + + nic->irq_unit = EFHW_IRQ_UNIT_UNUSED; + + nic->evq_sizes = 512 | 1024 | 2048 | 4096 | 8192 | + 16384 | 32768; + nic->txq_sizes = 512 | 1024 | 2048 | 4096; + nic->rxq_sizes = 512 | 1024 | 2048 | 4096; + nic->efhw_func = &falcon_char_functional_units; + nic->ctr_ap_bytes = EFHW_64M; + switch (nic->devtype.variant) { + case 'A': + nic->ctr_ap_bar = FALCON_S_CTR_AP_BAR; + nic->num_evqs = 4096; + nic->num_dmaqs = 4096; + nic->num_timers = 4096; + break; + case 'B': + nic->flags |= NIC_FLAG_NO_INTERRUPT; + nic->ctr_ap_bar = FALCON_P_CTR_AP_BAR; + nic->num_evqs = 4096; + nic->num_dmaqs = 4096; + nic->num_timers = 4096; + break; + default: + EFHW_ASSERT(0); + break; + } +} + + +void efhw_nic_close_interrupts(struct efhw_nic *nic) +{ + EFHW_ASSERT(nic); + if (!efhw_nic_have_hw(nic)) + return; + + EFHW_ASSERT(efhw_nic_have_hw(nic)); + + if (nic->irq_unit != EFHW_IRQ_UNIT_UNUSED) + efhw_nic_interrupt_disable(nic); +} + +void efhw_nic_dtor(struct efhw_nic *nic) +{ + EFHW_ASSERT(nic); + + /* Check that we have functional units because the software only + * driver doesn't initialise anything hardware related any more */ + + /* close interrupts is called first because the act of deregistering + the driver could cause this driver to change from master to slave + and hence the implicit interrupt mappings would be wrong */ + + EFHW_TRACE("%s: functional units ... ", __func__); + + if (efhw_nic_have_functional_units(nic)) { + efhw_nic_close_interrupts(nic); + efhw_nic_close_hardware(nic); + } + EFHW_TRACE("%s: functional units ... done", __func__); + + /* destroy event queues */ + EFHW_TRACE("%s: event queues ... ", __func__); + + if (nic->interrupting_evq.evq_mask) + efhw_keventq_dtor(nic, &nic->interrupting_evq); + if (nic->non_interrupting_evq.evq_mask) + efhw_keventq_dtor(nic, &nic->non_interrupting_evq); + + EFHW_TRACE("%s: event queues ... done", __func__); + + spin_lock_destroy(&nic->the_reg_lock); + + EFHW_TRACE("%s: DONE", __func__); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/kernel_compat.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/kernel_compat.h @@ -0,0 +1,70 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides compatibility layer for various Linux kernel versions + * (starting from 2.6.9 RHEL kernel). + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H +#define DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H + +#include +#include +#include +#include + +/********* pci_map_*() ********************/ + +extern void *efrm_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, int flag); + +extern void efrm_dma_free_coherent(struct device *dev, size_t size, + void *ptr, dma_addr_t dma_addr); + +static inline void *efrm_pci_alloc_consistent(struct pci_dev *hwdev, + size_t size, + dma_addr_t *dma_addr) +{ + return efrm_dma_alloc_coherent(&hwdev->dev, size, dma_addr, + GFP_ATOMIC); +} + +static inline void efrm_pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *ptr, dma_addr_t dma_addr) +{ + efrm_dma_free_coherent(&hwdev->dev, size, ptr, dma_addr); +} + + +#endif /* DRIVER_LINUX_RESOURCE_KERNEL_COMPAT_H */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/driver_object.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/driver_object.c @@ -0,0 +1,328 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains support for the global driver variables. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include "efrm_internal.h" + +/* We use #define rather than static inline here so that the Windows + * "prefast" compiler can see its own locking primitive when these + * two function are used (and then perform extra checking where they + * are used) + * + * Both macros operate on an irq_flags_t +*/ + +#define efrm_driver_lock(irqlock_state) \ + spin_lock_irqsave(&efrm_nic_tablep->lock, irqlock_state) + +#define efrm_driver_unlock(irqlock_state) \ + spin_unlock_irqrestore(&efrm_nic_tablep->lock, \ + irqlock_state); + +/* These routines are all methods on the architecturally singleton + global variables: efrm_nic_table, efrm_rm_table. + + I hope we never find a driver model that does not allow global + structure variables :) (but that would break almost every driver I've + ever seen). +*/ + +/*! Exported driver state */ +static struct efrm_nic_table efrm_nic_table; +struct efrm_nic_table *efrm_nic_tablep; +EXPORT_SYMBOL(efrm_nic_tablep); + + +/* Internal table with resource managers. + * We'd like to not export it, but we are still using efrm_rm_table + * in the char driver. So, it is declared in the private header with + * a purpose. */ +struct efrm_resource_manager *efrm_rm_table[EFRM_RESOURCE_NUM]; +EXPORT_SYMBOL(efrm_rm_table); + + +/* List of registered nics. */ +static LIST_HEAD(efrm_nics); + + +void efrm_driver_ctor(void) +{ + efrm_nic_tablep = &efrm_nic_table; + spin_lock_init(&efrm_nic_tablep->lock); + EFRM_TRACE("%s: driver created", __func__); +} + +void efrm_driver_dtor(void) +{ + EFRM_ASSERT(!efrm_nic_table_held()); + + spin_lock_destroy(&efrm_nic_tablep->lock); + memset(&efrm_nic_table, 0, sizeof(efrm_nic_table)); + memset(&efrm_rm_table, 0, sizeof(efrm_rm_table)); + EFRM_TRACE("%s: driver deleted", __func__); +} + +int efrm_driver_register_nic(struct efrm_nic *rnic, int nic_index, + int ifindex) +{ + struct efhw_nic *nic = &rnic->efhw_nic; + struct efrm_nic_per_vi *vis; + int max_vis, rc = 0; + irq_flags_t lock_flags; + + EFRM_ASSERT(nic_index >= 0); + EFRM_ASSERT(ifindex >= 0); + + max_vis = 4096; /* TODO: Get runtime value. */ + vis = vmalloc(max_vis * sizeof(rnic->vis[0])); + if (vis == NULL) { + EFRM_ERR("%s: Out of memory", __func__); + return -ENOMEM; + } + + efrm_driver_lock(lock_flags); + + if (efrm_nic_table_held()) { + EFRM_ERR("%s: driver object is in use", __func__); + rc = -EBUSY; + goto done; + } + + if (efrm_nic_tablep->nic_count == EFHW_MAX_NR_DEVS) { + EFRM_ERR("%s: filled up NIC table size %d", __func__, + EFHW_MAX_NR_DEVS); + rc = -E2BIG; + goto done; + } + + rnic->vis = vis; + + EFRM_ASSERT(efrm_nic_tablep->nic[nic_index] == NULL); + efrm_nic_tablep->nic[nic_index] = nic; + nic->index = nic_index; + nic->ifindex = ifindex; + + if (efrm_nic_tablep->a_nic == NULL) + efrm_nic_tablep->a_nic = nic; + + efrm_nic_tablep->nic_count++; + + INIT_LIST_HEAD(&rnic->clients); + list_add(&rnic->link, &efrm_nics); + + efrm_driver_unlock(lock_flags); + return 0; + +done: + efrm_driver_unlock(lock_flags); + vfree(vis); + return rc; +} + +int efrm_driver_unregister_nic(struct efrm_nic *rnic) +{ + struct efhw_nic *nic = &rnic->efhw_nic; + int rc = 0; + int nic_index = nic->index; + irq_flags_t lock_flags; + + EFRM_ASSERT(nic_index >= 0); + + efrm_driver_lock(lock_flags); + + if (efrm_nic_table_held()) { + EFRM_ERR("%s: driver object is in use", __func__); + rc = -EBUSY; + goto done; + } + if (!list_empty(&rnic->clients)) { + EFRM_ERR("%s: nic has active clients", __func__); + rc = -EBUSY; + goto done; + } + + EFRM_ASSERT(efrm_nic_tablep->nic[nic_index] == nic); + EFRM_ASSERT(list_empty(&rnic->clients)); + + list_del(&rnic->link); + + nic->index = -1; + efrm_nic_tablep->nic[nic_index] = NULL; + + --efrm_nic_tablep->nic_count; + + if (efrm_nic_tablep->a_nic == nic) { + if (efrm_nic_tablep->nic_count == 0) { + efrm_nic_tablep->a_nic = NULL; + } else { + for (nic_index = 0; nic_index < EFHW_MAX_NR_DEVS; + nic_index++) { + if (efrm_nic_tablep->nic[nic_index] != NULL) + efrm_nic_tablep->a_nic = + efrm_nic_tablep->nic[nic_index]; + } + EFRM_ASSERT(efrm_nic_tablep->a_nic); + } + } + +done: + efrm_driver_unlock(lock_flags); + return rc; +} + + +int efrm_nic_pre_reset(struct efhw_nic *nic) +{ + struct efrm_nic *rnic = efrm_nic(nic); + struct efrm_client *client; + struct efrm_resource *rs; + struct list_head *client_link; + struct list_head *rs_link; + irq_flags_t lock_flags; + + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + list_for_each(client_link, &rnic->clients) { + client = container_of(client_link, struct efrm_client, link); + EFRM_ERR("%s: client %p", __func__, client); + if (client->callbacks->pre_reset) + client->callbacks->pre_reset(client, client->user_data); + list_for_each(rs_link, &client->resources) { + rs = container_of(rs_link, struct efrm_resource, + rs_client_link); + EFRM_ERR("%s: resource %p", __func__, rs); + /* TODO: mark rs defunct */ + } + } + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); + + return 0; +} + + +int efrm_nic_stop(struct efhw_nic *nic) +{ + /* TODO */ + return 0; +} + + +int efrm_nic_resume(struct efhw_nic *nic) +{ + /* TODO */ + return 0; +} + + +static void efrm_client_nullcb(struct efrm_client *client, void *user_data) +{ +} + +static struct efrm_client_callbacks efrm_null_callbacks = { + efrm_client_nullcb, + efrm_client_nullcb, + efrm_client_nullcb +}; + + +int efrm_client_get(int ifindex, struct efrm_client_callbacks *callbacks, + void *user_data, struct efrm_client **client_out) +{ + struct efrm_nic *n, *rnic = NULL; + irq_flags_t lock_flags; + struct list_head *link; + struct efrm_client *client; + + if (callbacks == NULL) + callbacks = &efrm_null_callbacks; + + client = kmalloc(sizeof(*client), GFP_KERNEL); + if (client == NULL) + return -ENOMEM; + + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + list_for_each(link, &efrm_nics) { + n = container_of(link, struct efrm_nic, link); + if (n->efhw_nic.ifindex == ifindex || ifindex < 0) { + rnic = n; + break; + } + } + if (rnic) { + client->user_data = user_data; + client->callbacks = callbacks; + client->nic = &rnic->efhw_nic; + client->ref_count = 1; + INIT_LIST_HEAD(&client->resources); + list_add(&client->link, &rnic->clients); + } + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); + + if (rnic == NULL) + return -ENODEV; + + *client_out = client; + return 0; +} +EXPORT_SYMBOL(efrm_client_get); + + +void efrm_client_put(struct efrm_client *client) +{ + irq_flags_t lock_flags; + + EFRM_ASSERT(client->ref_count > 0); + + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + if (--client->ref_count > 0) + client = NULL; + else + list_del(&client->link); + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); + kfree(client); +} +EXPORT_SYMBOL(efrm_client_put); + + +struct efhw_nic *efrm_client_get_nic(struct efrm_client *client) +{ + return client->nic; +} +EXPORT_SYMBOL(efrm_client_get_nic); --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/vi_resource_alloc.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/vi_resource_alloc.c @@ -0,0 +1,820 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains allocation of VI resources. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +/*** Data definitions ****************************************************/ + +static const char *dmaq_names[] = { "TX", "RX" }; + +struct vi_resource_manager *efrm_vi_manager; + +/*** Forward references **************************************************/ + +static int +efrm_vi_resource_alloc_or_free(struct efrm_client *client, + int alloc, struct vi_resource *evq_virs, + uint16_t vi_flags, int32_t evq_capacity, + int32_t txq_capacity, int32_t rxq_capacity, + uint8_t tx_q_tag, uint8_t rx_q_tag, + struct vi_resource **virs_in_out); + +/*** Reference count handling ********************************************/ + +static inline void efrm_vi_rm_get_ref(struct vi_resource *virs) +{ + atomic_inc(&virs->evq_refs); +} + +static inline void efrm_vi_rm_drop_ref(struct vi_resource *virs) +{ + EFRM_ASSERT(atomic_read(&virs->evq_refs) != 0); + if (atomic_dec_and_test(&virs->evq_refs)) + efrm_vi_resource_alloc_or_free(virs->rs.rs_client, false, NULL, + 0, 0, 0, 0, 0, 0, &virs); +} + +/*** Instance numbers ****************************************************/ + +static inline int efrm_vi_rm_alloc_id(uint16_t vi_flags, int32_t evq_capacity) +{ + irq_flags_t lock_flags; + int instance; + int rc; + + if (efrm_nic_tablep->a_nic == NULL) /* ?? FIXME: surely not right */ + return -ENODEV; + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + + /* Falcon A1 RX phys addr wierdness. */ + if (efrm_nic_tablep->a_nic->devtype.variant == 'A' && + (vi_flags & EFHW_VI_RX_PHYS_ADDR_EN)) { + if (vi_flags & EFHW_VI_JUMBO_EN) { + /* Falcon-A cannot do phys + scatter. */ + EFRM_WARN + ("%s: falcon-A does not support phys+scatter mode", + __func__); + instance = -1; + } else if (efrm_vi_manager->iscsi_dmaq_instance_is_free + && evq_capacity == 0) { + /* Falcon-A has a single RXQ that gives the correct + * semantics for physical addressing. However, it + * happens to have the same instance number as the + * 'char' event queue, so we cannot also hand out + * the event queue. */ + efrm_vi_manager->iscsi_dmaq_instance_is_free = false; + instance = FALCON_A1_ISCSI_DMAQ; + } else { + EFRM_WARN("%s: iSCSI receive queue not free", + __func__); + instance = -1; + } + goto unlock_out; + } + + if (vi_flags & EFHW_VI_RM_WITH_INTERRUPT) { + rc = __kfifo_get(efrm_vi_manager->instances_with_interrupt, + (unsigned char *)&instance, sizeof(instance)); + if (rc != sizeof(instance)) { + EFRM_ASSERT(rc == 0); + instance = -1; + } + goto unlock_out; + } + + /* Otherwise a normal run-of-the-mill VI. */ + rc = __kfifo_get(efrm_vi_manager->instances_with_timer, + (unsigned char *)&instance, sizeof(instance)); + if (rc != sizeof(instance)) { + EFRM_ASSERT(rc == 0); + instance = -1; + } + +unlock_out: + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + return instance; +} + +static void efrm_vi_rm_free_id(int instance) +{ + irq_flags_t lock_flags; + struct kfifo *instances; + + if (efrm_nic_tablep->a_nic == NULL) /* ?? FIXME: surely not right */ + return; + + if (efrm_nic_tablep->a_nic->devtype.variant == 'A' && + instance == FALCON_A1_ISCSI_DMAQ) { + EFRM_ASSERT(efrm_vi_manager->iscsi_dmaq_instance_is_free == + false); + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + efrm_vi_manager->iscsi_dmaq_instance_is_free = true; + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, + lock_flags); + } else { + if (instance >= efrm_vi_manager->with_timer_base && + instance < efrm_vi_manager->with_timer_limit) { + instances = efrm_vi_manager->instances_with_timer; + } else { + EFRM_ASSERT(instance >= + efrm_vi_manager->with_interrupt_base); + EFRM_ASSERT(instance < + efrm_vi_manager->with_interrupt_limit); + instances = efrm_vi_manager->instances_with_interrupt; + } + + EFRM_VERIFY_EQ(kfifo_put(instances, (unsigned char *)&instance, + sizeof(instance)), sizeof(instance)); + } +} + +/*** Queue sizes *********************************************************/ + +/* NB. This should really take a nic as an argument, but that makes + * the buffer table allocation difficult. */ +uint32_t efrm_vi_rm_evq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */) +{ + return virs->evq_capacity * sizeof(efhw_event_t); +} +EXPORT_SYMBOL(efrm_vi_rm_evq_bytes); + +/* NB. This should really take a nic as an argument, but that makes + * the buffer table allocation difficult. */ +uint32_t efrm_vi_rm_txq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */) +{ + return virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] * + FALCON_DMA_TX_DESC_BYTES; +} +EXPORT_SYMBOL(efrm_vi_rm_txq_bytes); + +/* NB. This should really take a nic as an argument, but that makes + * the buffer table allocation difficult. */ +uint32_t efrm_vi_rm_rxq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */) +{ + uint32_t bytes_per_desc = ((virs->flags & EFHW_VI_RX_PHYS_ADDR_EN) + ? FALCON_DMA_RX_PHYS_DESC_BYTES + : FALCON_DMA_RX_BUF_DESC_BYTES); + return virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] * bytes_per_desc; +} +EXPORT_SYMBOL(efrm_vi_rm_rxq_bytes); + +static int choose_size(int size_rq, unsigned sizes) +{ + int size; + + /* size_rq < 0 means default, but we interpret this as 'minimum'. */ + + for (size = 256;; size <<= 1) + if ((size & sizes) && size >= size_rq) + return size; + else if ((sizes & ~((size - 1) | size)) == 0) + return -1; +} + +static int +efrm_vi_rm_adjust_alloc_request(struct vi_resource *virs, struct efhw_nic *nic) +{ + int capacity; + + EFRM_ASSERT(nic->efhw_func); + + if (virs->evq_capacity) { + capacity = choose_size(virs->evq_capacity, nic->evq_sizes); + if (capacity < 0) { + EFRM_ERR("vi_resource: bad evq size %d (supported=%x)", + virs->evq_capacity, nic->evq_sizes); + return -E2BIG; + } + virs->evq_capacity = capacity; + } + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { + capacity = + choose_size(virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], + nic->txq_sizes); + if (capacity < 0) { + EFRM_ERR("vi_resource: bad txq size %d (supported=%x)", + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], + nic->txq_sizes); + return -E2BIG; + } + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] = capacity; + } + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { + capacity = + choose_size(virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX], + nic->rxq_sizes); + if (capacity < 0) { + EFRM_ERR("vi_resource: bad rxq size %d (supported=%x)", + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX], + nic->rxq_sizes); + return -E2BIG; + } + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] = capacity; + } + + return 0; +} + +/* remove the reference to the event queue in this VI resource and decrement + the event queue's use count */ +static inline void efrm_vi_rm_detach_evq(struct vi_resource *virs) +{ + struct vi_resource *evq_virs; + + EFRM_ASSERT(virs != NULL); + + evq_virs = virs->evq_virs; + + if (evq_virs != NULL) { + virs->evq_virs = NULL; + if (evq_virs == virs) { + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT + " had internal event queue ", __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); + } else { + efrm_vi_rm_drop_ref(evq_virs); + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " had event queue " + EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), + EFRM_RESOURCE_PRI_ARG(evq_virs->rs. + rs_handle)); + } + } else { + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT + " had no event queue (nothing to do)", + __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); + } +} + +/*** Buffer Table allocations ********************************************/ + +static int +efrm_vi_rm_alloc_or_free_buffer_table(struct vi_resource *virs, bool is_alloc) +{ + uint32_t bytes; + int page_order; + int rc; + + if (!is_alloc) + goto destroy; + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { + bytes = efrm_vi_rm_txq_bytes(virs); + page_order = get_order(bytes); + rc = efrm_buffer_table_alloc(page_order, + (virs->dmaq_buf_tbl_alloc + + EFRM_VI_RM_DMA_QUEUE_TX)); + if (rc != 0) { + EFRM_TRACE + ("%s: Error %d allocating TX buffer table entry", + __func__, rc); + goto fail_txq_alloc; + } + } + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { + bytes = efrm_vi_rm_rxq_bytes(virs); + page_order = get_order(bytes); + rc = efrm_buffer_table_alloc(page_order, + (virs->dmaq_buf_tbl_alloc + + EFRM_VI_RM_DMA_QUEUE_RX)); + if (rc != 0) { + EFRM_TRACE + ("%s: Error %d allocating RX buffer table entry", + __func__, rc); + goto fail_rxq_alloc; + } + } + return 0; + +destroy: + rc = 0; + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) { + efrm_buffer_table_free(&virs-> + dmaq_buf_tbl_alloc + [EFRM_VI_RM_DMA_QUEUE_RX]); + } +fail_rxq_alloc: + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]) { + efrm_buffer_table_free(&virs-> + dmaq_buf_tbl_alloc + [EFRM_VI_RM_DMA_QUEUE_TX]); + } +fail_txq_alloc: + + return rc; +} + +/*** Per-NIC allocations *************************************************/ + +static inline int +efrm_vi_rm_init_evq(struct vi_resource *virs, struct efhw_nic *nic) +{ + int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + struct eventq_resource_hardware *evq_hw = + &virs->nic_info.evq_pages; + uint32_t buf_bytes = efrm_vi_rm_evq_bytes(virs); + int rc; + + if (virs->evq_capacity == 0) + return 0; + evq_hw->capacity = virs->evq_capacity; + + /* Allocate buffer table entries to map onto the iobuffer. This + * currently allocates its own buffer table entries on Falcon which is + * a bit wasteful on a multi-NIC system. */ + evq_hw->buf_tbl_alloc.base = (unsigned)-1; + rc = efrm_buffer_table_alloc(get_order(buf_bytes), + &evq_hw->buf_tbl_alloc); + if (rc < 0) { + EFHW_WARN("%s: failed (%d) to alloc %d buffer table entries", + __func__, rc, get_order(buf_bytes)); + return rc; + } + + /* Allocate the event queue memory. */ + rc = efhw_nic_event_queue_alloc_iobuffer(nic, evq_hw, instance, + buf_bytes); + if (rc != 0) { + EFRM_ERR("%s: Error allocating iobuffer: %d", __func__, rc); + efrm_buffer_table_free(&evq_hw->buf_tbl_alloc); + return rc; + } + + /* Initialise the event queue hardware */ + efhw_nic_event_queue_enable(nic, instance, virs->evq_capacity, + efhw_iopages_dma_addr(&evq_hw->iobuff) + + evq_hw->iobuff_off, + evq_hw->buf_tbl_alloc.base, + instance < 64); + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " capacity=%u", __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), + virs->evq_capacity); + +#if defined(__ia64__) + /* Page size may be large, so for now just increase the + * size of the requested evq up to a round number of + * pages + */ + buf_bytes = CI_ROUNDUP(buf_bytes, PAGE_SIZE); +#endif + EFRM_ASSERT(buf_bytes % PAGE_SIZE == 0); + + virs->mem_mmap_bytes += buf_bytes; + + return 0; +} + +static inline void +efrm_vi_rm_fini_evq(struct vi_resource *virs, struct efhw_nic *nic) +{ + int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + struct vi_resource_nic_info *nic_info = &virs->nic_info; + + if (virs->evq_capacity == 0) + return; + + /* Zero the timer-value for this queue. + And Tell NIC to stop using this event queue. */ + efhw_nic_event_queue_disable(nic, instance, 0); + + if (nic_info->evq_pages.buf_tbl_alloc.base != (unsigned)-1) + efrm_buffer_table_free(&nic_info->evq_pages.buf_tbl_alloc); + + efhw_iopages_free(nic, &nic_info->evq_pages.iobuff); +} + +/*! FIXME: we should make sure this number is never zero (=> unprotected) */ +/*! FIXME: put this definition in a relevant header (e.g. as (evqid)+1) */ +#define EFAB_EVQ_OWNER_ID(evqid) ((evqid)) + +void +efrm_vi_rm_init_dmaq(struct vi_resource *virs, int queue_type, + struct efhw_nic *nic) +{ + int instance; + int evq_instance; + efhw_buffer_addr_t buf_addr; + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + evq_instance = EFRM_RESOURCE_INSTANCE(virs->evq_virs->rs.rs_handle); + + buf_addr = virs->dmaq_buf_tbl_alloc[queue_type].base; + + if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) { + efhw_nic_dmaq_tx_q_init(nic, + instance, /* dmaq */ + evq_instance, /* evq */ + EFAB_EVQ_OWNER_ID(evq_instance), /* owner */ + virs->dmaq_tag[queue_type], /* tag */ + virs->dmaq_capacity[queue_type], /* size of queue */ + buf_addr, /* buffer index */ + virs->flags); /* user specified Q attrs */ + } else { + efhw_nic_dmaq_rx_q_init(nic, + instance, /* dmaq */ + evq_instance, /* evq */ + EFAB_EVQ_OWNER_ID(evq_instance), /* owner */ + virs->dmaq_tag[queue_type], /* tag */ + virs->dmaq_capacity[queue_type], /* size of queue */ + buf_addr, /* buffer index */ + virs->flags); /* user specified Q attrs */ + } +} + +static int +efrm_vi_rm_init_or_fini_dmaq(struct vi_resource *virs, + int queue_type, int init, + struct efhw_nic *nic) +{ + int rc; + int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + uint32_t buf_bytes; + struct vi_resource_nic_info *nic_info = &virs->nic_info; + int page_order; + uint32_t num_pages; + struct efhw_iopages *iobuff; + + if (!init) + goto destroy; + + /* Ignore disabled queues. */ + if (virs->dmaq_capacity[queue_type] == 0) { + if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) + efhw_nic_dmaq_tx_q_disable(nic, instance); + else + efhw_nic_dmaq_rx_q_disable(nic, instance); + return 0; + } + + buf_bytes = (queue_type == EFRM_VI_RM_DMA_QUEUE_TX + ? efrm_vi_rm_txq_bytes(virs) + : efrm_vi_rm_rxq_bytes(virs)); + + page_order = get_order(buf_bytes); + + rc = efhw_iopages_alloc(nic, &nic_info->dmaq_pages[queue_type], + page_order); + if (rc != 0) { + EFRM_ERR("%s: Failed to allocate %s DMA buffer.", __func__, + dmaq_names[queue_type]); + goto fail_iopages; + } + + num_pages = 1 << page_order; + iobuff = &nic_info->dmaq_pages[queue_type]; + efhw_nic_buffer_table_set_n(nic, + virs->dmaq_buf_tbl_alloc[queue_type].base, + efhw_iopages_dma_addr(iobuff), + EFHW_NIC_PAGE_SIZE, 0, num_pages, 0); + + falcon_nic_buffer_table_confirm(nic); + + virs->mem_mmap_bytes += roundup(buf_bytes, PAGE_SIZE); + + /* Make sure there is an event queue. */ + if (virs->evq_virs->evq_capacity <= 0) { + EFRM_ERR("%s: Cannot use empty event queue for %s DMA", + __func__, dmaq_names[queue_type]); + rc = -EINVAL; + goto fail_evq; + } + + efrm_vi_rm_init_dmaq(virs, queue_type, nic); + + return 0; + +destroy: + rc = 0; + + /* Ignore disabled queues. */ + if (virs->dmaq_capacity[queue_type] == 0) + return 0; + + /* Ensure TX pacing turned off -- queue flush doesn't reset this. */ + if (queue_type == EFRM_VI_RM_DMA_QUEUE_TX) + falcon_nic_pace(nic, instance, 0); + + /* No need to disable the queue here. Nobody is using it anyway. */ + +fail_evq: + efhw_iopages_free(nic, &nic_info->dmaq_pages[queue_type]); +fail_iopages: + + return rc; +} + +static int +efrm_vi_rm_init_or_fini_nic(struct vi_resource *virs, int init, + struct efhw_nic *nic) +{ + int rc; +#ifndef NDEBUG + int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); +#endif + + if (!init) + goto destroy; + + rc = efrm_vi_rm_init_evq(virs, nic); + if (rc != 0) + goto fail_evq; + + rc = efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, + init, nic); + if (rc != 0) + goto fail_txq; + + rc = efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, + init, nic); + if (rc != 0) + goto fail_rxq; + + /* Allocate space for the control page. */ + EFRM_ASSERT(falcon_tx_dma_page_offset(instance) < PAGE_SIZE); + EFRM_ASSERT(falcon_rx_dma_page_offset(instance) < PAGE_SIZE); + EFRM_ASSERT(falcon_timer_page_offset(instance) < PAGE_SIZE); + virs->bar_mmap_bytes += PAGE_SIZE; + + return 0; + +destroy: + rc = 0; + + efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, + false, nic); +fail_rxq: + + efrm_vi_rm_init_or_fini_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, + false, nic); +fail_txq: + + efrm_vi_rm_fini_evq(virs, nic); +fail_evq: + + EFRM_ASSERT(rc != 0 || !init); + return rc; +} + +static int +efrm_vi_resource_alloc_or_free(struct efrm_client *client, + int alloc, struct vi_resource *evq_virs, + uint16_t vi_flags, int32_t evq_capacity, + int32_t txq_capacity, int32_t rxq_capacity, + uint8_t tx_q_tag, uint8_t rx_q_tag, + struct vi_resource **virs_in_out) +{ + struct efhw_nic *nic = client->nic; + struct vi_resource *virs; + int rc; + int instance; + + EFRM_ASSERT(virs_in_out); + EFRM_ASSERT(efrm_vi_manager); + EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_vi_manager->rm); + + if (!alloc) + goto destroy; + + rx_q_tag &= (1 << TX_DESCQ_LABEL_WIDTH) - 1; + tx_q_tag &= (1 << RX_DESCQ_LABEL_WIDTH) - 1; + + virs = kmalloc(sizeof(*virs), GFP_KERNEL); + if (virs == NULL) { + EFRM_ERR("%s: Error allocating VI resource object", + __func__); + rc = -ENOMEM; + goto fail_alloc; + } + memset(virs, 0, sizeof(*virs)); + + /* Some macros make the assumption that the struct efrm_resource is + * the first member of a struct vi_resource. */ + EFRM_ASSERT(&virs->rs == (struct efrm_resource *) (virs)); + + instance = efrm_vi_rm_alloc_id(vi_flags, evq_capacity); + if (instance < 0) { + /* Clear out the close list... */ + efrm_vi_rm_salvage_flushed_vis(); + instance = efrm_vi_rm_alloc_id(vi_flags, evq_capacity); + if (instance >= 0) + EFRM_TRACE("%s: Salvaged a closed VI.", __func__); + } + + if (instance < 0) { + /* Could flush resources and try again here. */ + EFRM_ERR("%s: Out of appropriate VI resources", __func__); + rc = -EBUSY; + goto fail_alloc_id; + } + + EFRM_TRACE("%s: new VI ID %d", __func__, instance); + efrm_resource_init(&virs->rs, EFRM_RESOURCE_VI, instance); + + /* Start with one reference. Any external VIs using the EVQ of this + * resource will increment this reference rather than the resource + * reference to avoid DMAQ flushes from waiting for other DMAQ + * flushes to complete. When the resource reference goes to zero, + * the DMAQ flush happens. When the flush completes, this reference + * is decremented. When this reference reaches zero, the instance + * is freed. */ + atomic_set(&virs->evq_refs, 1); + + virs->bar_mmap_bytes = 0; + virs->mem_mmap_bytes = 0; + virs->evq_capacity = evq_capacity; + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] = txq_capacity; + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] = rxq_capacity; + virs->dmaq_tag[EFRM_VI_RM_DMA_QUEUE_TX] = tx_q_tag; + virs->dmaq_tag[EFRM_VI_RM_DMA_QUEUE_RX] = rx_q_tag; + virs->flags = vi_flags; + INIT_LIST_HEAD(&virs->tx_flush_link); + INIT_LIST_HEAD(&virs->rx_flush_link); + virs->tx_flushing = 0; + virs->rx_flushing = 0; + + /* Adjust the queue sizes. */ + rc = efrm_vi_rm_adjust_alloc_request(virs, nic); + if (rc != 0) + goto fail_adjust_request; + + /* Attach the EVQ early so that we can ensure that the NIC sets + * match. */ + if (evq_virs == NULL) { + evq_virs = virs; + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT + " has no external event queue", __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); + } else { + /* Make sure the resource managers are the same. */ + if (EFRM_RESOURCE_TYPE(evq_virs->rs.rs_handle) != + EFRM_RESOURCE_VI) { + EFRM_ERR("%s: Mismatched owner for event queue VI " + EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(evq_virs->rs.rs_handle)); + return -EINVAL; + } + EFRM_ASSERT(atomic_read(&evq_virs->evq_refs) != 0); + efrm_vi_rm_get_ref(evq_virs); + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " uses event queue " + EFRM_RESOURCE_FMT, + __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), + EFRM_RESOURCE_PRI_ARG(evq_virs->rs.rs_handle)); + } + virs->evq_virs = evq_virs; + + rc = efrm_vi_rm_alloc_or_free_buffer_table(virs, true); + if (rc != 0) + goto fail_buffer_table; + + rc = efrm_vi_rm_init_or_fini_nic(virs, true, nic); + if (rc != 0) + goto fail_init_nic; + + efrm_client_add_resource(client, &virs->rs); + *virs_in_out = virs; + EFRM_TRACE("%s: Allocated " EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); + return 0; + +destroy: + virs = *virs_in_out; + EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 1); + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + EFRM_TRACE("%s: Freeing %d", __func__, + EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); + + /* Destroying the VI. The reference count must be zero. */ + EFRM_ASSERT(atomic_read(&virs->evq_refs) == 0); + + /* The EVQ should have gone (and DMA disabled) so that this + * function can't be re-entered to destroy the EVQ VI. */ + EFRM_ASSERT(virs->evq_virs == NULL); + rc = 0; + +fail_init_nic: + efrm_vi_rm_init_or_fini_nic(virs, false, nic); + + efrm_vi_rm_alloc_or_free_buffer_table(virs, false); +fail_buffer_table: + + efrm_vi_rm_detach_evq(virs); + +fail_adjust_request: + + EFRM_ASSERT(virs->evq_callback_fn == NULL); + EFRM_TRACE("%s: delete VI ID %d", __func__, instance); + efrm_vi_rm_free_id(instance); +fail_alloc_id: + if (!alloc) + efrm_client_put(virs->rs.rs_client); + EFRM_DO_DEBUG(memset(virs, 0, sizeof(*virs))); + kfree(virs); +fail_alloc: + *virs_in_out = NULL; + + return rc; +} + +/*** Resource object ****************************************************/ + +int +efrm_vi_resource_alloc(struct efrm_client *client, + struct vi_resource *evq_virs, + uint16_t vi_flags, int32_t evq_capacity, + int32_t txq_capacity, int32_t rxq_capacity, + uint8_t tx_q_tag, uint8_t rx_q_tag, + struct vi_resource **virs_out, + uint32_t *out_io_mmap_bytes, + uint32_t *out_mem_mmap_bytes, + uint32_t *out_txq_capacity, uint32_t *out_rxq_capacity) +{ + int rc; + EFRM_ASSERT(client != NULL); + rc = efrm_vi_resource_alloc_or_free(client, true, evq_virs, vi_flags, + evq_capacity, txq_capacity, + rxq_capacity, tx_q_tag, rx_q_tag, + virs_out); + if (rc == 0) { + if (out_io_mmap_bytes != NULL) + *out_io_mmap_bytes = (*virs_out)->bar_mmap_bytes; + if (out_mem_mmap_bytes != NULL) + *out_mem_mmap_bytes = (*virs_out)->mem_mmap_bytes; + if (out_txq_capacity != NULL) + *out_txq_capacity = + (*virs_out)->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX]; + if (out_rxq_capacity != NULL) + *out_rxq_capacity = + (*virs_out)->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]; + } + + return rc; +} +EXPORT_SYMBOL(efrm_vi_resource_alloc); + +void efrm_vi_rm_free_flushed_resource(struct vi_resource *virs) +{ + EFRM_ASSERT(virs != NULL); + EFRM_ASSERT(virs->rs.rs_ref_count == 0); + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); + /* release the associated event queue then drop our own reference + * count */ + efrm_vi_rm_detach_evq(virs); + efrm_vi_rm_drop_ref(virs); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/vi_resource_flush.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/vi_resource_flush.c @@ -0,0 +1,483 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains DMA queue flushing of VI resources. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +/* can fail as workitem can already be scheuled -- ignore failure */ +#define EFRM_VI_RM_DELAYED_FREE(manager) \ + queue_work(manager->workqueue, &manager->work_item) + +static const int flush_fifo_hwm = 8 /* TODO should be a HW specific const */ ; + +static void +efrm_vi_resource_rx_flush_done(struct vi_resource *virs, bool *completed) +{ + /* We should only get a flush event if there is a flush + * outstanding. */ + EFRM_ASSERT(virs->rx_flush_outstanding); + + virs->rx_flush_outstanding = 0; + virs->rx_flushing = 0; + + list_del(&virs->rx_flush_link); + efrm_vi_manager->rx_flush_outstanding_count--; + + if (virs->tx_flushing == 0) { + list_add_tail(&virs->rx_flush_link, + &efrm_vi_manager->close_pending); + *completed = 1; + } +} + +static void +efrm_vi_resource_tx_flush_done(struct vi_resource *virs, bool *completed) +{ + /* We should only get a flush event if there is a flush + * outstanding. */ + EFRM_ASSERT(virs->tx_flushing); + + virs->tx_flushing = 0; + + list_del(&virs->tx_flush_link); + + if (virs->rx_flushing == 0) { + list_add_tail(&virs->rx_flush_link, + &efrm_vi_manager->close_pending); + *completed = 1; + } +} + +static void +efrm_vi_resource_issue_rx_flush(struct vi_resource *virs, bool *completed) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + int instance; + int rc; + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + list_add_tail(&virs->rx_flush_link, + &efrm_vi_manager->rx_flush_outstanding_list); + virs->rx_flush_outstanding = virs->rx_flushing; + efrm_vi_manager->rx_flush_outstanding_count++; + + EFRM_TRACE("%s: rx queue %d flush requested for nic %d", + __func__, instance, nic->index); + rc = efhw_nic_flush_rx_dma_channel(nic, instance); + if (rc == -EAGAIN) + efrm_vi_resource_rx_flush_done(virs, completed); +} + +static void +efrm_vi_resource_issue_tx_flush(struct vi_resource *virs, bool *completed) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + int instance; + int rc; + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + list_add_tail(&virs->tx_flush_link, + &efrm_vi_manager->tx_flush_outstanding_list); + + EFRM_TRACE("%s: tx queue %d flush requested for nic %d", + __func__, instance, nic->index); + rc = efhw_nic_flush_tx_dma_channel(nic, instance); + if (rc == -EAGAIN) + efrm_vi_resource_tx_flush_done(virs, completed); +} + +static void efrm_vi_resource_process_waiting_flushes(bool *completed) +{ + struct vi_resource *virs; + + while (efrm_vi_manager->rx_flush_outstanding_count < flush_fifo_hwm && + !list_empty(&efrm_vi_manager->rx_flush_waiting_list)) { + virs = + list_entry(list_pop + (&efrm_vi_manager->rx_flush_waiting_list), + struct vi_resource, rx_flush_link); + efrm_vi_resource_issue_rx_flush(virs, completed); + } +} + +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND +static void +efrm_vi_resource_flush_retry_vi(struct vi_resource *virs, + int64_t time_now, bool *completed) +{ + struct efhw_nic *nic; + int instance; + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + virs->flush_count++; + virs->flush_time = time_now; + nic = virs->rs.rs_client->nic; + +#if BUG7916_WORKAROUND + if (virs->rx_flush_outstanding) { + EFRM_TRACE("%s: Retrying RX flush on instance %d", + __func__, instance); + + list_del(&virs->rx_flush_link); + efrm_vi_manager->rx_flush_outstanding_count--; + efrm_vi_resource_issue_rx_flush(virs, completed); + efrm_vi_resource_process_waiting_flushes(completed); + } +#endif + +#if BUG5302_WORKAROUND + if (virs->tx_flushing) { + if (virs->flush_count > 5) { + EFRM_TRACE("%s: VI resource stuck flush pending " + "(instance=%d, count=%d)", + __func__, instance, virs->flush_count); + falcon_clobber_tx_dma_ptrs(nic, instance); + } else { + EFRM_TRACE("%s: Retrying TX flush on instance %d", + __func__, instance); + } + + list_del(&virs->tx_flush_link); + efrm_vi_resource_issue_tx_flush(virs, completed); + } +#endif +} +#endif + +int efrm_vi_resource_flush_retry(struct vi_resource *virs) +{ +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND + irq_flags_t lock_flags; + bool completed = false; + + if (virs->rx_flushing == 0 && virs->tx_flushing == 0) + return -EALREADY; + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + efrm_vi_resource_flush_retry_vi(virs, get_jiffies_64(), &completed); + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + + if (completed) + EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); +#endif + + return 0; +} +EXPORT_SYMBOL(efrm_vi_resource_flush_retry); + +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND +/* resource manager lock should be taken before this call */ +static void efrm_vi_handle_flush_loss(bool *completed) +{ + struct list_head *pos, *temp; + struct vi_resource *virs; + int64_t time_now, time_pending; + + /* It's possible we miss flushes - the list is sorted in order we + * generate flushes, see if any are very old. It's also possible + * that we decide an endpoint is flushed even though we've not + * received all the flush events. We *should * mark as + * completed, reclaim and loop again. ?? + * THIS NEEDS BACKPORTING FROM THE FALCON branch + */ + time_now = get_jiffies_64(); + +#if BUG7916_WORKAROUND + list_for_each_safe(pos, temp, + &efrm_vi_manager->rx_flush_outstanding_list) { + virs = container_of(pos, struct vi_resource, rx_flush_link); + + time_pending = time_now - virs->flush_time; + + /* List entries are held in reverse chronological order. Only + * process the old ones. */ + if (time_pending <= 0x100000000LL) + break; + + efrm_vi_resource_flush_retry_vi(virs, time_now, completed); + } +#endif + +#if BUG5302_WORKAROUND + list_for_each_safe(pos, temp, + &efrm_vi_manager->tx_flush_outstanding_list) { + virs = container_of(pos, struct vi_resource, tx_flush_link); + + time_pending = time_now - virs->flush_time; + + /* List entries are held in reverse chronological order. + * Only process the old ones. */ + if (time_pending <= 0x100000000LL) + break; + + efrm_vi_resource_flush_retry_vi(virs, time_now, completed); + } +#endif +} +#endif + +void +efrm_vi_register_flush_callback(struct vi_resource *virs, + void (*handler)(void *), void *arg) +{ + if (handler == NULL) { + virs->flush_callback_fn = handler; + wmb(); + virs->flush_callback_arg = arg; + } else { + virs->flush_callback_arg = arg; + wmb(); + virs->flush_callback_fn = handler; + } +} +EXPORT_SYMBOL(efrm_vi_register_flush_callback); + +int efrm_pt_flush(struct vi_resource *virs) +{ + int instance; + irq_flags_t lock_flags; + bool completed = false; + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + EFRM_ASSERT(virs->rx_flushing == 0); + EFRM_ASSERT(virs->rx_flush_outstanding == 0); + EFRM_ASSERT(virs->tx_flushing == 0); + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " EVQ=%d TXQ=%d RXQ=%d", + __func__, EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), + virs->evq_capacity, + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX], + virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]); + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX] != 0) + virs->rx_flushing = 1; + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] != 0) + virs->tx_flushing = 1; + + /* Clean up immediately if there are no flushes. */ + if (virs->rx_flushing == 0 && virs->tx_flushing == 0) { + list_add_tail(&virs->rx_flush_link, + &efrm_vi_manager->close_pending); + completed = true; + } + + /* Issue the RX flush if possible or queue it for later. */ + if (virs->rx_flushing) { +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND + if (efrm_vi_manager->rx_flush_outstanding_count >= + flush_fifo_hwm) + efrm_vi_handle_flush_loss(&completed); +#endif + if (efrm_vi_manager->rx_flush_outstanding_count >= + flush_fifo_hwm) { + list_add_tail(&virs->rx_flush_link, + &efrm_vi_manager->rx_flush_waiting_list); + } else { + efrm_vi_resource_issue_rx_flush(virs, &completed); + } + } + + /* Issue the TX flush. There's no limit to the number of + * outstanding TX flushes. */ + if (virs->tx_flushing) + efrm_vi_resource_issue_tx_flush(virs, &completed); + + virs->flush_time = get_jiffies_64(); + + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + + if (completed) + EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); + + return 0; +} +EXPORT_SYMBOL(efrm_pt_flush); + +static void +efrm_handle_rx_dmaq_flushed(struct efhw_nic *flush_nic, int instance, + bool *completed) +{ + struct list_head *pos, *temp; + struct vi_resource *virs; + + list_for_each_safe(pos, temp, + &efrm_vi_manager->rx_flush_outstanding_list) { + virs = container_of(pos, struct vi_resource, rx_flush_link); + + if (instance == EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)) { + efrm_vi_resource_rx_flush_done(virs, completed); + efrm_vi_resource_process_waiting_flushes(completed); + return; + } + } + EFRM_TRACE("%s: Unhandled rx flush event, nic %d, instance %d", + __func__, flush_nic->index, instance); +} + +static void +efrm_handle_tx_dmaq_flushed(struct efhw_nic *flush_nic, int instance, + bool *completed) +{ + struct list_head *pos, *temp; + struct vi_resource *virs; + + list_for_each_safe(pos, temp, + &efrm_vi_manager->tx_flush_outstanding_list) { + virs = container_of(pos, struct vi_resource, tx_flush_link); + + if (instance == EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)) { + efrm_vi_resource_tx_flush_done(virs, completed); + return; + } + } + EFRM_TRACE("%s: Unhandled tx flush event, nic %d, instance %d", + __func__, flush_nic->index, instance); +} + +void +efrm_handle_dmaq_flushed(struct efhw_nic *flush_nic, unsigned instance, + int rx_flush) +{ + irq_flags_t lock_flags; + bool completed = false; + + EFRM_TRACE("%s: nic_i=%d instance=%d rx_flush=%d", __func__, + flush_nic->index, instance, rx_flush); + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + + if (rx_flush) + efrm_handle_rx_dmaq_flushed(flush_nic, instance, &completed); + else + efrm_handle_tx_dmaq_flushed(flush_nic, instance, &completed); + +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND + efrm_vi_handle_flush_loss(&completed); +#endif + + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + + if (completed) + EFRM_VI_RM_DELAYED_FREE(efrm_vi_manager); +} + +static void +efrm_vi_rm_reinit_dmaqs(struct vi_resource *virs) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_TX] != 0) + efrm_vi_rm_init_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_TX, nic); + if (virs->dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_RX]) + efrm_vi_rm_init_dmaq(virs, EFRM_VI_RM_DMA_QUEUE_RX, nic); +} + +/* free any PT endpoints whose flush has now complete */ +void efrm_vi_rm_delayed_free(struct work_struct *data) +{ + irq_flags_t lock_flags; + struct list_head close_pending; + struct vi_resource *virs; + + EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_vi_manager->rm); + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + list_replace_init(&efrm_vi_manager->close_pending, &close_pending); + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + + EFRM_TRACE("%s: %p", __func__, efrm_vi_manager); + while (!list_empty(&close_pending)) { + virs = + list_entry(list_pop(&close_pending), struct vi_resource, + rx_flush_link); + EFRM_TRACE("%s: flushed VI instance=%d", __func__, + EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); + + if (virs->flush_callback_fn != NULL) { + efrm_vi_rm_reinit_dmaqs(virs); + virs->flush_callback_fn(virs->flush_callback_arg); + } else + efrm_vi_rm_free_flushed_resource(virs); + } +} + +void efrm_vi_rm_salvage_flushed_vis(void) +{ +#if BUG7916_WORKAROUND || BUG5302_WORKAROUND + irq_flags_t lock_flags; + bool completed; + + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + efrm_vi_handle_flush_loss(&completed); + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); +#endif + + efrm_vi_rm_delayed_free(&efrm_vi_manager->work_item); +} + +void efrm_vi_resource_free(struct vi_resource *virs) +{ + efrm_vi_register_flush_callback(virs, NULL, NULL); + efrm_pt_flush(virs); +} +EXPORT_SYMBOL(efrm_vi_resource_free); + + +void efrm_vi_resource_release(struct vi_resource *virs) +{ + if (__efrm_resource_release(&virs->rs)) + efrm_vi_resource_free(virs); +} +EXPORT_SYMBOL(efrm_vi_resource_release); + +/* + * vi: sw=8:ai:aw + */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/falcon_hash.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/falcon_hash.c @@ -0,0 +1,159 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains EtherFabric NIC hash algorithms implementation. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + + +static unsigned int +common_get_ip_key(unsigned int src_ip, unsigned int src_port, + unsigned int dest_ip, unsigned int dest_port, + int tcp, int full, int tx, unsigned int masked_q_id) +{ + + unsigned int tmp_port, result; + + EFHW_ASSERT(tcp == 0 || tcp == 1); + EFHW_ASSERT(full == 0 || full == 1); + EFHW_ASSERT(masked_q_id < (1 << 10)); + + /* m=masked_q_id(TX)/0(RX) u=UDP S,D=src/dest addr s,d=src/dest port + * + * Wildcard filters have src(TX)/dest(RX) addr and port = 0; + * and UDP wildcard filters have the src and dest port fields swapped. + * + * Addr/port fields are little-endian. + * + * 3322222222221111111111 + * 10987654321098765432109876543210 + * + * 000000000000000000000mmmmmmmmmmu ^ + * DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD ^ + * ddddddddddddddddSSSSSSSSSSSSSSSS ^ + * SSSSSSSSSSSSSSSSssssssssssssssss + */ + + if (!tx) + masked_q_id = 0; + + if (!full) { + if (tx) { + dest_ip = 0; + dest_port = 0; + } else { + src_ip = 0; + src_port = 0; + } + if (!tcp) { + tmp_port = src_port; + src_port = dest_port; + dest_port = tmp_port; + } + } + + result = ((masked_q_id << 1) | (!tcp)) ^ + (dest_ip) ^ + (((dest_port & 0xffff) << 16) | ((src_ip >> 16) & 0xffff)) ^ + (((src_ip & 0xffff) << 16) | (src_port & 0xffff)); + + EFHW_TRACE("%s: IP %s %s %x", __func__, tcp ? "TCP" : "UDP", + full ? "Full" : "Wildcard", result); + + return result; +} + + +unsigned int +falcon_hash_get_ip_key(unsigned int src_ip, unsigned int src_port, + unsigned int dest_ip, unsigned int dest_port, + int tcp, int full) +{ + return common_get_ip_key(src_ip, src_port, dest_ip, dest_port, tcp, + full, 0, 0); +} + + +/* This function generates the First Hash key */ +unsigned int falcon_hash_function1(unsigned int key, unsigned int nfilters) +{ + + unsigned short int lfsr_reg; + unsigned int tmp_key; + int index; + + unsigned short int lfsr_input; + unsigned short int single_bit_key; + unsigned short int bit16_lfsr; + unsigned short int bit3_lfsr; + + lfsr_reg = 0xFFFF; + tmp_key = key; + + /* For Polynomial equation X^16+X^3+1 */ + for (index = 0; index < 32; index++) { + /* Get the bit from key and shift the key */ + single_bit_key = (tmp_key & 0x80000000) >> 31; + tmp_key = tmp_key << 1; + + /* get the Tap bits to XOR operation */ + bit16_lfsr = (lfsr_reg & 0x8000) >> 15; + bit3_lfsr = (lfsr_reg & 0x0004) >> 2; + + /* Get the Input value to the LFSR */ + lfsr_input = ((bit16_lfsr ^ bit3_lfsr) ^ single_bit_key); + + /* Shift and store out of the two TAPs */ + lfsr_reg = lfsr_reg << 1; + lfsr_reg = lfsr_reg | (lfsr_input & 0x0001); + + } + + lfsr_reg = lfsr_reg & (nfilters - 1); + + return lfsr_reg; +} + +/* This function generates the Second Hash */ +unsigned int +falcon_hash_function2(unsigned int key, unsigned int nfilters) +{ + return (unsigned int)(((unsigned long long)key * 2 - 1) & + (nfilters - 1)); +} + +/* This function iterates through the hash table */ +unsigned int +falcon_hash_iterator(unsigned int hash1, unsigned int hash2, + unsigned int n_search, unsigned int nfilters) +{ + return (hash1 + (n_search * hash2)) & (nfilters - 1); +} + --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/kfifo.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/kfifo.c @@ -0,0 +1,208 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * This file is stolen from the Linux kernel sources + * (linux-2.6.22/kernel/kfifo.c) into sfc_resource driver. + * It should be used for old kernels without kfifo implementation. + * Most part of linux/kfifo.h is incorporated into + * ci/efrm/sysdep_linux.h. + */ +#include +#ifdef HAS_NO_KFIFO + +#include +#include +#include +#include +/*#include */ + +/** + * kfifo_init - allocates a new FIFO using a preallocated buffer + * @buffer: the preallocated buffer to be used. + * @size: the size of the internal buffer, this have to be a power of 2. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * Do NOT pass the kfifo to kfifo_free() after use! Simply free the + * &struct kfifo with kfree(). + */ +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + gfp_t gfp_mask, spinlock_t *lock) +{ + struct kfifo *fifo; + + /* size must be a power of 2 */ + BUG_ON(size & (size - 1)); + + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); + if (!fifo) + return ERR_PTR(-ENOMEM); + + fifo->buffer = buffer; + fifo->size = size; + fifo->in = fifo->out = 0; + fifo->lock = lock; + + return fifo; +} +EXPORT_SYMBOL(kfifo_init); + +/** + * kfifo_alloc - allocates a new FIFO and its internal buffer + * @size: the size of the internal buffer to be allocated. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * The size will be rounded-up to a power of 2. + */ +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) +{ + unsigned char *buffer; + struct kfifo *ret; + + /* + * round up to the next power of 2, since our 'let the indices + * wrap' tachnique works only in this case. + */ + if (size & (size - 1)) { + BUG_ON(size > 0x80000000); + size = roundup_pow_of_two(size); + } + + buffer = kmalloc(size, gfp_mask); + if (!buffer) + return ERR_PTR(-ENOMEM); + + ret = kfifo_init(buffer, size, gfp_mask, lock); + + if (IS_ERR(ret)) + kfree(buffer); + + return ret; +} +EXPORT_SYMBOL(kfifo_alloc); + +/** + * kfifo_free - frees the FIFO + * @fifo: the fifo to be freed. + */ +void kfifo_free(struct kfifo *fifo) +{ + kfree(fifo->buffer); + kfree(fifo); +} +EXPORT_SYMBOL(kfifo_free); + +/** + * __kfifo_put - puts some data into the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most @len bytes from the @buffer into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int +__kfifo_put(struct kfifo *fifo, unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->size - fifo->in + fifo->out); + + /* + * Ensure that we sample the fifo->out index -before- we + * start putting bytes into the kfifo. + */ + + smp_mb(); + + /* first put the data starting from fifo->in to buffer end */ + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); + + /* then put the rest (if any) at the beginning of the buffer */ + memcpy(fifo->buffer, buffer + l, len - l); + + /* + * Ensure that we add the bytes to the kfifo -before- + * we update the fifo->in index. + */ + + smp_wmb(); + + fifo->in += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_put); + +/** + * __kfifo_get - gets some data from the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most @len bytes from the FIFO into the + * @buffer and returns the number of copied bytes. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int +__kfifo_get(struct kfifo *fifo, unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->in - fifo->out); + + /* + * Ensure that we sample the fifo->in index -before- we + * start removing bytes from the kfifo. + */ + + smp_rmb(); + + /* first get the data from fifo->out until the end of the buffer */ + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); + + /* then get the rest (if any) from the beginning of the buffer */ + memcpy(buffer + l, fifo->buffer, len - l); + + /* + * Ensure that we remove the bytes from the kfifo -before- + * we update the fifo->out index. + */ + + smp_mb(); + + fifo->out += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_get); + +#endif --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/kernel_proc.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/kernel_proc.c @@ -0,0 +1,109 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains /proc/driver/sfc_resource/ implementation. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include + +/** Top level directory for sfc specific stats **/ +static struct proc_dir_entry *efrm_proc_root; /* = NULL */ + +static int +efrm_resource_read_proc(char *buf, char **start, off_t offset, int count, + int *eof, void *data); + +int efrm_install_proc_entries(void) +{ + /* create the top-level directory for etherfabric specific stuff */ + efrm_proc_root = proc_mkdir("driver/sfc_resource", NULL); + if (!efrm_proc_root) + return -ENOMEM; + + if (create_proc_read_entry("resources", 0, efrm_proc_root, + efrm_resource_read_proc, 0) == NULL) { + EFRM_WARN("%s: Unable to create /proc/drivers/sfc_resource/" + "resources", __func__); + } + return 0; +} + +void efrm_uninstall_proc_entries(void) +{ + EFRM_ASSERT(efrm_proc_root); + remove_proc_entry("resources", efrm_proc_root); + remove_proc_entry(efrm_proc_root->name, efrm_proc_root->parent); + efrm_proc_root = NULL; +} + +/**************************************************************************** + * + * /proc/drivers/sfc/resources + * + ****************************************************************************/ + +#define EFRM_PROC_PRINTF(buf, len, fmt, ...) \ + do { \ + if (count - len > 0) \ + len += snprintf(buf+len, count-len, (fmt), \ + __VA_ARGS__); \ + } while (0) + +static int +efrm_resource_read_proc(char *buf, char **start, off_t offset, int count, + int *eof, void *data) +{ + irq_flags_t lock_flags; + int len = 0; + int type; + struct efrm_resource_manager *rm; + + for (type = 0; type < EFRM_RESOURCE_NUM; type++) { + rm = efrm_rm_table[type]; + if (rm == NULL) + continue; + + EFRM_PROC_PRINTF(buf, len, "*** %s ***\n", rm->rm_name); + + spin_lock_irqsave(&rm->rm_lock, lock_flags); + EFRM_PROC_PRINTF(buf, len, "current = %u\n", rm->rm_resources); + EFRM_PROC_PRINTF(buf, len, " max = %u\n\n", + rm->rm_resources_hiwat); + spin_unlock_irqrestore(&rm->rm_lock, lock_flags); + } + + return count ? strlen(buf) : 0; +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/resource_manager.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/resource_manager.c @@ -0,0 +1,145 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains generic code for resources and resource managers. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include "efrm_internal.h" + +/********************************************************************** + * struct efrm_resource_manager + */ + +void efrm_resource_manager_dtor(struct efrm_resource_manager *rm) +{ + EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm); + + /* call destructor */ + EFRM_DO_DEBUG(if (rm->rm_resources) + EFRM_ERR("%s: %s leaked %d resources", + __func__, rm->rm_name, rm->rm_resources)); + EFRM_ASSERT(rm->rm_resources == 0); + EFRM_ASSERT(list_empty(&rm->rm_resources_list)); + + rm->rm_dtor(rm); + + /* clear out things built by efrm_resource_manager_ctor */ + spin_lock_destroy(&rm->rm_lock); + + /* and the free the memory */ + EFRM_DO_DEBUG(memset(rm, 0, sizeof(*rm))); + kfree(rm); +} + +/* Construct a resource manager. Resource managers are singletons. */ +int +efrm_resource_manager_ctor(struct efrm_resource_manager *rm, + void (*dtor)(struct efrm_resource_manager *), + const char *name, unsigned type) +{ + EFRM_ASSERT(rm); + EFRM_ASSERT(dtor); + + rm->rm_name = name; + EFRM_DO_DEBUG(rm->rm_type = type); + rm->rm_dtor = dtor; + spin_lock_init(&rm->rm_lock); + rm->rm_resources = 0; + rm->rm_resources_hiwat = 0; + INIT_LIST_HEAD(&rm->rm_resources_list); + EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm); + return 0; +} + + +void efrm_client_add_resource(struct efrm_client *client, + struct efrm_resource *rs) +{ + struct efrm_resource_manager *rm; + irq_flags_t lock_flags; + + EFRM_ASSERT(client != NULL); + EFRM_ASSERT(rs != NULL); + + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; + ++rm->rm_resources; + list_add(&rs->rs_manager_link, &rm->rm_resources_list); + if (rm->rm_resources > rm->rm_resources_hiwat) + rm->rm_resources_hiwat = rm->rm_resources; + rs->rs_client = client; + ++client->ref_count; + list_add(&rs->rs_client_link, &client->resources); + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); +} + + +void efrm_resource_ref(struct efrm_resource *rs) +{ + irq_flags_t lock_flags; + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + ++rs->rs_ref_count; + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); +} +EXPORT_SYMBOL(efrm_resource_ref); + + +int __efrm_resource_release(struct efrm_resource *rs) +{ + struct efrm_resource_manager *rm; + irq_flags_t lock_flags; + int free_rs; + + spin_lock_irqsave(&efrm_nic_tablep->lock, lock_flags); + free_rs = --rs->rs_ref_count == 0; + if (free_rs) { + rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; + EFRM_ASSERT(rm->rm_resources > 0); + --rm->rm_resources; + list_del(&rs->rs_manager_link); + list_del(&rs->rs_client_link); + } + spin_unlock_irqrestore(&efrm_nic_tablep->lock, lock_flags); + return free_rs; +} +EXPORT_SYMBOL(__efrm_resource_release); + +/* + * vi: sw=8:ai:aw + */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/vi_resource_event.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/vi_resource_event.c @@ -0,0 +1,250 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains event handling for VI resource. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +static inline int +efrm_eventq_bytes(struct vi_resource *virs) +{ + return efrm_vi_rm_evq_bytes(virs); +} + + +static inline efhw_event_t * +efrm_eventq_base(struct vi_resource *virs) +{ + struct eventq_resource_hardware *hw; + hw = &(virs->nic_info.evq_pages); + return (efhw_event_t *) (efhw_iopages_ptr(&(hw->iobuff)) + + hw->iobuff_off); +} + + +void +efrm_eventq_request_wakeup(struct vi_resource *virs, unsigned current_ptr) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + int next_i; + next_i = ((current_ptr / sizeof(efhw_event_t)) & + (virs->evq_capacity - 1)); + + efhw_nic_wakeup_request(nic, efrm_eventq_dma_addr(virs), next_i, + EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle)); +} +EXPORT_SYMBOL(efrm_eventq_request_wakeup); + +void efrm_eventq_reset(struct vi_resource *virs) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + int instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + + EFRM_ASSERT(virs->evq_capacity != 0); + + /* FIXME: Protect against concurrent resets. */ + + efhw_nic_event_queue_disable(nic, instance, 0); + + memset(efrm_eventq_base(virs), EFHW_CLEAR_EVENT_VALUE, + efrm_eventq_bytes(virs)); + efhw_nic_event_queue_enable(nic, instance, virs->evq_capacity, + efrm_eventq_dma_addr(virs), + virs->nic_info.evq_pages. + buf_tbl_alloc.base, + instance < 64); + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle)); +} +EXPORT_SYMBOL(efrm_eventq_reset); + +int +efrm_eventq_register_callback(struct vi_resource *virs, + void (*handler) (void *, int, + struct efhw_nic *nic), + void *arg) +{ + struct efrm_nic_per_vi *cb_info; + int instance; + int bit; + + EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); + EFRM_ASSERT(virs->evq_capacity != 0); + EFRM_ASSERT(handler != NULL); + + /* ?? TODO: Get rid of this test when client is compulsory. */ + if (virs->rs.rs_client == NULL) { + EFRM_ERR("%s: no client", __func__); + return -EINVAL; + } + + virs->evq_callback_arg = arg; + virs->evq_callback_fn = handler; + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + cb_info = &efrm_nic(virs->rs.rs_client->nic)->vis[instance]; + + /* The handler can be set only once. */ + bit = test_and_set_bit(VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, + &cb_info->state); + if (bit) + return -EBUSY; + cb_info->vi = virs; + + return 0; +} +EXPORT_SYMBOL(efrm_eventq_register_callback); + +void efrm_eventq_kill_callback(struct vi_resource *virs) +{ + struct efrm_nic_per_vi *cb_info; + int32_t evq_state; + int instance; + int bit; + + EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); + EFRM_ASSERT(virs->evq_capacity != 0); + EFRM_ASSERT(virs->rs.rs_client != NULL); + + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + cb_info = &efrm_nic(virs->rs.rs_client->nic)->vis[instance]; + cb_info->vi = NULL; + + /* Disable the timer. */ + efhw_nic_event_queue_disable(virs->rs.rs_client->nic, + instance, /*timer_only */ 1); + + /* Disable the callback. */ + bit = test_and_clear_bit(VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, + &cb_info->state); + EFRM_ASSERT(bit); /* do not call me twice! */ + + /* Spin until the callback is complete. */ + do { + rmb(); + + udelay(1); + evq_state = cb_info->state; + } while ((evq_state & VI_RESOURCE_EVQ_STATE(BUSY))); + + virs->evq_callback_fn = NULL; +} +EXPORT_SYMBOL(efrm_eventq_kill_callback); + +static void +efrm_eventq_do_callback(struct efhw_nic *nic, unsigned instance, + bool is_timeout) +{ + struct efrm_nic *rnic = efrm_nic(nic); + void (*handler) (void *, int is_timeout, struct efhw_nic *nic); + void *arg; + struct efrm_nic_per_vi *cb_info; + int32_t evq_state; + int32_t new_evq_state; + struct vi_resource *virs; + int bit; + + EFRM_ASSERT(efrm_vi_manager); + + cb_info = &rnic->vis[instance]; + + /* Set the BUSY bit and clear WAKEUP_PENDING. Do this + * before waking up the sleeper to avoid races. */ + while (1) { + evq_state = cb_info->state; + new_evq_state = evq_state; + + if ((evq_state & VI_RESOURCE_EVQ_STATE(BUSY)) != 0) { + EFRM_ERR("%s:%d: evq_state[%d] corrupted!", + __func__, __LINE__, instance); + return; + } + + if (!is_timeout) + new_evq_state &= ~VI_RESOURCE_EVQ_STATE(WAKEUP_PENDING); + + if (evq_state & VI_RESOURCE_EVQ_STATE(CALLBACK_REGISTERED)) { + new_evq_state |= VI_RESOURCE_EVQ_STATE(BUSY); + virs = cb_info->vi; + if (cmpxchg(&cb_info->state, evq_state, + new_evq_state) == evq_state) + break; + } else { + /* Just update the state if necessary. */ + if (new_evq_state == evq_state || + cmpxchg(&cb_info->state, evq_state, + new_evq_state) == evq_state) + return; + } + } + + if (virs) { + handler = virs->evq_callback_fn; + arg = virs->evq_callback_arg; + EFRM_ASSERT(handler != NULL); + handler(arg, is_timeout, nic); + } + + /* Clear the BUSY bit. */ + bit = + test_and_clear_bit(VI_RESOURCE_EVQ_STATE_BUSY, + &cb_info->state); + if (!bit) { + EFRM_ERR("%s:%d: evq_state corrupted!", + __func__, __LINE__); + } +} + +void efrm_handle_wakeup_event(struct efhw_nic *nic, unsigned instance) +{ + efrm_eventq_do_callback(nic, instance, false); +} + +void efrm_handle_timeout_event(struct efhw_nic *nic, unsigned instance) +{ + efrm_eventq_do_callback(nic, instance, true); +} + +void efrm_handle_sram_event(struct efhw_nic *nic) +{ + if (nic->buf_commit_outstanding > 0) + nic->buf_commit_outstanding--; +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/iobufset_resource.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/iobufset_resource.c @@ -0,0 +1,404 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains non-contiguous I/O buffers support. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +#define EFRM_IOBUFSET_MAX_NUM_INSTANCES 0x00010000 + +struct iobufset_resource_manager { + struct efrm_resource_manager rm; + struct kfifo *free_ids; +}; + +struct iobufset_resource_manager *efrm_iobufset_manager; + +#define iobsrs(rs1) iobufset_resource(rs1) + +/* Returns size of iobufset resource data structure. */ +static inline size_t iobsrs_size(int n_pages) +{ + return offsetof(struct iobufset_resource, bufs) + + n_pages * sizeof(struct efhw_iopage); +} + +void efrm_iobufset_resource_free(struct iobufset_resource *rs) +{ + unsigned int i; + int id; + + EFRM_RESOURCE_ASSERT_VALID(&rs->rs, 1); + + if (!rs->linked && rs->buf_tbl_alloc.base != (unsigned) -1) + efrm_buffer_table_free(&rs->buf_tbl_alloc); + + /* see comment on call to efhw_iopage_alloc in the alloc routine above + for discussion on use of efrm_nic_tablep->a_nic here */ + EFRM_ASSERT(efrm_nic_tablep->a_nic); + if (rs->linked) { + /* Nothing to do. */ + } else if (rs->chunk_order == 0) { + for (i = 0; i < rs->n_bufs; ++i) + efhw_iopage_free(efrm_nic_tablep->a_nic, &rs->bufs[i]); + } else { + /* it is important that this is executed in increasing page + * order because some implementations of + * efhw_iopages_init_from_iopage() assume this */ + for (i = 0; i < rs->n_bufs; + i += rs->pages_per_contiguous_chunk) { + struct efhw_iopages iopages; + efhw_iopages_init_from_iopage(&iopages, &rs->bufs[i], + rs->chunk_order); + efhw_iopages_free(efrm_nic_tablep->a_nic, &iopages); + } + } + + /* free the instance number */ + id = EFRM_RESOURCE_INSTANCE(rs->rs.rs_handle); + EFRM_VERIFY_EQ(kfifo_put(efrm_iobufset_manager->free_ids, + (unsigned char *)&id, sizeof(id)), sizeof(id)); + + efrm_vi_resource_release(rs->evq); + if (rs->linked) + efrm_iobufset_resource_release(rs->linked); + + efrm_client_put(rs->rs.rs_client); + if (iobsrs_size(rs->n_bufs) < PAGE_SIZE) { + EFRM_DO_DEBUG(memset(rs, 0, sizeof(*rs))); + kfree(rs); + } else { + EFRM_DO_DEBUG(memset(rs, 0, sizeof(*rs))); + vfree(rs); + } +} +EXPORT_SYMBOL(efrm_iobufset_resource_free); + + +void efrm_iobufset_resource_release(struct iobufset_resource *iobrs) +{ + if (__efrm_resource_release(&iobrs->rs)) + efrm_iobufset_resource_free(iobrs); +} +EXPORT_SYMBOL(efrm_iobufset_resource_release); + + + +int +efrm_iobufset_resource_alloc(int32_t n_pages, + int32_t pages_per_contiguous_chunk, + struct vi_resource *vi_evq, + struct iobufset_resource *linked, + bool phys_addr_mode, + struct iobufset_resource **iobrs_out) +{ + struct iobufset_resource *iobrs; + int rc, instance, object_size; + unsigned int i; + + EFRM_ASSERT(iobrs_out); + EFRM_ASSERT(efrm_iobufset_manager); + EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_iobufset_manager->rm); + EFRM_RESOURCE_ASSERT_VALID(&vi_evq->rs, 0); + EFRM_ASSERT(EFRM_RESOURCE_TYPE(vi_evq->rs.rs_handle) == + EFRM_RESOURCE_VI); + EFRM_ASSERT(efrm_nic_tablep->a_nic); + + if (linked) { + /* This resource will share properties and memory with + * another. Only difference is that we'll program it into + * the buffer table of another nic. + */ + n_pages = linked->n_bufs; + pages_per_contiguous_chunk = linked->pages_per_contiguous_chunk; + phys_addr_mode = linked->buf_tbl_alloc.base == (unsigned) -1; + } + + /* allocate the resource data structure. */ + object_size = iobsrs_size(n_pages); + if (object_size < PAGE_SIZE) { + /* this should be OK from a tasklet */ + /* Necessary to do atomic alloc() as this + can be called from a weird-ass iSCSI context that is + !in_interrupt but is in_atomic - See BUG3163 */ + iobrs = kmalloc(object_size, GFP_ATOMIC); + } else { /* can't do this within a tasklet */ +#ifndef NDEBUG + if (in_interrupt() || in_atomic()) { + EFRM_ERR("%s(): alloc->u.iobufset.in_n_pages=%d", + __func__, n_pages); + EFRM_ASSERT(!in_interrupt()); + EFRM_ASSERT(!in_atomic()); + } +#endif + iobrs = (struct iobufset_resource *) vmalloc(object_size); + } + if (iobrs == NULL) { + EFRM_WARN("%s: failed to allocate container", __func__); + rc = -ENOMEM; + goto fail1; + } + + /* Allocate an instance number. */ + rc = kfifo_get(efrm_iobufset_manager->free_ids, + (unsigned char *)&instance, sizeof(instance)); + if (rc != sizeof(instance)) { + EFRM_WARN("%s: out of instances", __func__); + EFRM_ASSERT(rc == 0); + rc = -EBUSY; + goto fail3; + } + + efrm_resource_init(&iobrs->rs, EFRM_RESOURCE_IOBUFSET, instance); + + iobrs->evq = vi_evq; + iobrs->linked = linked; + iobrs->n_bufs = n_pages; + iobrs->pages_per_contiguous_chunk = pages_per_contiguous_chunk; + iobrs->chunk_order = fls(iobrs->pages_per_contiguous_chunk - 1); + iobrs->buf_tbl_alloc.base = (unsigned) -1; + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " %u pages", __func__, + EFRM_RESOURCE_PRI_ARG(iobrs->rs.rs_handle), iobrs->n_bufs); + + /* Allocate the iobuffers. */ + if (linked) { + memcpy(iobrs->bufs, linked->bufs, + iobrs->n_bufs * sizeof(iobrs->bufs[0])); + } else if (iobrs->chunk_order == 0) { + memset(iobrs->bufs, 0, iobrs->n_bufs * sizeof(iobrs->bufs[0])); + for (i = 0; i < iobrs->n_bufs; ++i) { + /* due to bug2426 we have to specifiy a NIC when + * allocating a DMAable page, which is a bit messy. + * For now we assume that if the page is suitable + * (e.g. DMAable) by one nic (efrm_nic_tablep->a_nic), + * it is suitable for all NICs. + * XXX I bet that breaks in Solaris. + */ + rc = efhw_iopage_alloc(efrm_nic_tablep->a_nic, + &iobrs->bufs[i]); + if (rc < 0) { + EFRM_WARN("%s: failed (rc %d) to allocate " + "page (i=%u)", __func__, rc, i); + goto fail4; + } + } + } else { + struct efhw_iopages iopages; + unsigned j; + + memset(iobrs->bufs, 0, iobrs->n_bufs * sizeof(iobrs->bufs[0])); + for (i = 0; i < iobrs->n_bufs; + i += iobrs->pages_per_contiguous_chunk) { + rc = efhw_iopages_alloc(efrm_nic_tablep->a_nic, + &iopages, iobrs->chunk_order); + if (rc < 0) { + EFRM_WARN("%s: failed (rc %d) to allocate " + "pages (i=%u order %d)", + __func__, rc, i, + iobrs->chunk_order); + goto fail4; + } + for (j = 0; j < iobrs->pages_per_contiguous_chunk; + j++) { + /* some implementation of + * efhw_iopage_init_from_iopages() rely on + * this function being called for + * _all_ pages in the chunk */ + efhw_iopage_init_from_iopages( + &iobrs->bufs[i + j], + &iopages, j); + } + } + } + + if (!phys_addr_mode) { + unsigned owner_id = EFAB_VI_RESOURCE_INSTANCE(iobrs->evq); + + if (!linked) { + /* Allocate space in the NIC's buffer table. */ + rc = efrm_buffer_table_alloc(fls(iobrs->n_bufs - 1), + &iobrs->buf_tbl_alloc); + if (rc < 0) { + EFRM_WARN("%s: failed (%d) to alloc %d buffer " + "table entries", __func__, rc, + iobrs->n_bufs); + goto fail5; + } + EFRM_ASSERT(((unsigned)1 << iobrs->buf_tbl_alloc.order) + >= (unsigned) iobrs->n_bufs); + } else { + iobrs->buf_tbl_alloc = linked->buf_tbl_alloc; + } + + /* Initialise the buffer table entries. */ + for (i = 0; i < iobrs->n_bufs; ++i) { + /*\ ?? \TODO burst them! */ + efrm_buffer_table_set(&iobrs->buf_tbl_alloc, + vi_evq->rs.rs_client->nic, + i, + efhw_iopage_dma_addr(&iobrs-> + bufs[i]), + owner_id); + } + efrm_buffer_table_commit(); + } + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " %d pages @ " + EFHW_BUFFER_ADDR_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(iobrs->rs.rs_handle), + iobrs->n_bufs, EFHW_BUFFER_ADDR(iobrs->buf_tbl_alloc.base, + 0)); + efrm_resource_ref(&iobrs->evq->rs); + if (linked != NULL) + efrm_resource_ref(&linked->rs); + efrm_client_add_resource(vi_evq->rs.rs_client, &iobrs->rs); + *iobrs_out = iobrs; + return 0; + +fail5: + i = iobrs->n_bufs; +fail4: + /* see comment on call to efhw_iopage_alloc above for a discussion + * on use of efrm_nic_tablep->a_nic here */ + if (linked) { + /* Nothing to do. */ + } else if (iobrs->chunk_order == 0) { + while (i--) { + struct efhw_iopage *page = &iobrs->bufs[i]; + efhw_iopage_free(efrm_nic_tablep->a_nic, page); + } + } else { + unsigned int j; + for (j = 0; j < i; j += iobrs->pages_per_contiguous_chunk) { + struct efhw_iopages iopages; + + EFRM_ASSERT(j % iobrs->pages_per_contiguous_chunk + == 0); + /* it is important that this is executed in increasing + * page order because some implementations of + * efhw_iopages_init_from_iopage() assume this */ + efhw_iopages_init_from_iopage(&iopages, + &iobrs->bufs[j], + iobrs->chunk_order); + efhw_iopages_free(efrm_nic_tablep->a_nic, &iopages); + } + } +fail3: + if (object_size < PAGE_SIZE) + kfree(iobrs); + else + vfree(iobrs); +fail1: + return rc; +} +EXPORT_SYMBOL(efrm_iobufset_resource_alloc); + +static void iobufset_rm_dtor(struct efrm_resource_manager *rm) +{ + EFRM_ASSERT(&efrm_iobufset_manager->rm == rm); + kfifo_vfree(efrm_iobufset_manager->free_ids); +} + +int +efrm_create_iobufset_resource_manager(struct efrm_resource_manager **rm_out) +{ + int rc, max; + + EFRM_ASSERT(rm_out); + + efrm_iobufset_manager = + kmalloc(sizeof(*efrm_iobufset_manager), GFP_KERNEL); + if (efrm_iobufset_manager == 0) + return -ENOMEM; + memset(efrm_iobufset_manager, 0, sizeof(*efrm_iobufset_manager)); + + /* + * Bug 1145, 1370: We need to set initial size of both the resource + * table and instance id table so they never need to grow as we + * want to be allocate new iobufset at tasklet time. Lets make + * a pessimistic guess at maximum number of iobufsets possible. + * Could be less because + * - jumbo frames have same no of packets per iobufset BUT more + * pages per buffer + * - buffer table entries used independently of iobufsets by + * sendfile + * + * Based on TCP/IP stack setting of PKTS_PER_SET_S=5 ... + * - can't use this define here as it breaks the layering. + */ +#define MIN_PAGES_PER_IOBUFSET (1 << 4) + + max = efrm_buffer_table_size() / MIN_PAGES_PER_IOBUFSET; + max = min_t(int, max, EFRM_IOBUFSET_MAX_NUM_INSTANCES); + + /* HACK: There currently exists an option to allocate buffers that + * are not programmed into the buffer table, so the max number is + * not limited by the buffer table size. I'm hoping this usage + * will go away eventually. + */ + max = 32768; + + rc = efrm_kfifo_id_ctor(&efrm_iobufset_manager->free_ids, + 0, max, &efrm_iobufset_manager->rm.rm_lock); + if (rc != 0) + goto fail1; + + rc = efrm_resource_manager_ctor(&efrm_iobufset_manager->rm, + iobufset_rm_dtor, "IOBUFSET", + EFRM_RESOURCE_IOBUFSET); + if (rc < 0) + goto fail2; + + *rm_out = &efrm_iobufset_manager->rm; + return 0; + +fail2: + kfifo_vfree(efrm_iobufset_manager->free_ids); +fail1: + EFRM_DO_DEBUG(memset(efrm_iobufset_manager, 0, + sizeof(*efrm_iobufset_manager))); + kfree(efrm_iobufset_manager); + return rc; +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/buddy.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/buddy.c @@ -0,0 +1,220 @@ + +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains implementation of a buddy allocator. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include /* get uintXX types on win32 */ +#include +#include +#include + +#if 1 +#define DEBUG_ALLOC(x) +#else +#define DEBUG_ALLOC(x) x + +static inline void efrm_buddy_dump(struct efrm_buddy_allocator *b) +{ + unsigned o; + + EFRM_NOTICE("%s: dump allocator with order %u", + __func__, b->order); + for (o = 0; o <= b->order; o++) { + struct list_head *l = &b->free_lists[o]; + while (l->next != &b->free_lists[o]) { + l = l->next; + EFRM_NOTICE("%s: order %x: %zx", __func__, o, + l - b->links); + } + } +} +#endif + +/* + * The purpose of the following inline functions is to give the + * understandable names to the simple actions. + */ +static inline void +efrm_buddy_free_list_add(struct efrm_buddy_allocator *b, + unsigned order, unsigned addr) +{ + list_add(&b->links[addr], &b->free_lists[order]); + b->orders[addr] = (uint8_t) order; +} +static inline void +efrm_buddy_free_list_del(struct efrm_buddy_allocator *b, unsigned addr) +{ + list_del(&b->links[addr]); + b->links[addr].next = NULL; +} +static inline int +efrm_buddy_free_list_empty(struct efrm_buddy_allocator *b, unsigned order) +{ + return list_empty(&b->free_lists[order]); +} +static inline unsigned +efrm_buddy_free_list_pop(struct efrm_buddy_allocator *b, unsigned order) +{ + struct list_head *l = list_pop(&b->free_lists[order]); + l->next = NULL; + return (unsigned)(l - b->links); +} +static inline int +efrm_buddy_addr_in_free_list(struct efrm_buddy_allocator *b, unsigned addr) +{ + return b->links[addr].next != NULL; +} +static inline unsigned +efrm_buddy_free_list_first(struct efrm_buddy_allocator *b, unsigned order) +{ + return (unsigned)(b->free_lists[order].next - b->links); +} + +int efrm_buddy_ctor(struct efrm_buddy_allocator *b, unsigned order) +{ + unsigned o; + unsigned size = 1 << order; + + DEBUG_ALLOC(EFRM_NOTICE("%s(%u)", __func__, order)); + EFRM_ASSERT(b); + EFRM_ASSERT(order <= sizeof(unsigned) * 8 - 1); + + b->order = order; + b->free_lists = vmalloc((order + 1) * sizeof(struct list_head)); + if (b->free_lists == NULL) + goto fail1; + + b->links = vmalloc(size * sizeof(struct list_head)); + if (b->links == NULL) + goto fail2; + + b->orders = vmalloc(size); + if (b->orders == NULL) + goto fail3; + + memset(b->links, 0, size * sizeof(struct list_head)); + + for (o = 0; o <= b->order; ++o) + INIT_LIST_HEAD(b->free_lists + o); + + efrm_buddy_free_list_add(b, b->order, 0); + + return 0; + +fail3: + vfree(b->links); +fail2: + vfree(b->free_lists); +fail1: + return -ENOMEM; +} + +void efrm_buddy_dtor(struct efrm_buddy_allocator *b) +{ + EFRM_ASSERT(b); + + vfree(b->free_lists); + vfree(b->links); + vfree(b->orders); +} + +int efrm_buddy_alloc(struct efrm_buddy_allocator *b, unsigned order) +{ + unsigned smallest; + unsigned addr; + + DEBUG_ALLOC(EFRM_NOTICE("%s(%u)", __func__, order)); + EFRM_ASSERT(b); + + /* Find smallest chunk that is big enough. ?? Can optimise this by + ** keeping array of pointers to smallest chunk for each order. + */ + smallest = order; + while (smallest <= b->order && + efrm_buddy_free_list_empty(b, smallest)) + ++smallest; + + if (smallest > b->order) { + DEBUG_ALLOC(EFRM_NOTICE + ("buddy - alloc order %d failed - max order %d", + order, b->order);); + return -ENOMEM; + } + + /* Split blocks until we get one of the correct size. */ + addr = efrm_buddy_free_list_pop(b, smallest); + + DEBUG_ALLOC(EFRM_NOTICE("buddy - alloc %x order %d cut from order %d", + addr, order, smallest);); + while (smallest-- > order) + efrm_buddy_free_list_add(b, smallest, addr + (1 << smallest)); + + EFRM_DO_DEBUG(b->orders[addr] = (uint8_t) order); + + EFRM_ASSERT(addr < 1u << b->order); + return addr; +} + +void +efrm_buddy_free(struct efrm_buddy_allocator *b, unsigned addr, + unsigned order) +{ + unsigned buddy_addr; + + DEBUG_ALLOC(EFRM_NOTICE("%s(%u, %u)", __func__, addr, order)); + EFRM_ASSERT(b); + EFRM_ASSERT(order <= b->order); + EFRM_ASSERT((unsigned long)addr + ((unsigned long)1 << order) <= + (unsigned long)1 << b->order); + EFRM_ASSERT(!efrm_buddy_addr_in_free_list(b, addr)); + EFRM_ASSERT(b->orders[addr] == order); + + /* merge free blocks */ + while (order < b->order) { + buddy_addr = addr ^ (1 << order); + if (!efrm_buddy_addr_in_free_list(b, buddy_addr) || + b->orders[buddy_addr] != order) + break; + efrm_buddy_free_list_del(b, buddy_addr); + if (buddy_addr < addr) + addr = buddy_addr; + ++order; + } + + DEBUG_ALLOC(EFRM_NOTICE + ("buddy - free %x merged into order %d", addr, order);); + efrm_buddy_free_list_add(b, order, addr); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/buffer_table.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/buffer_table.c @@ -0,0 +1,209 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains abstraction of the buffer table on the NIC. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* +** Might be worth keeping a bitmap of which entries are clear. Then we +** wouldn't need to clear them all again when we free an allocation. +*/ + +#include +#include +#include +#include +#include + +/*! Comment? */ +struct efrm_buffer_table { + spinlock_t lock; + struct efrm_buddy_allocator buddy; +}; + +/* Efab buffer state. */ +static struct efrm_buffer_table efrm_buffers; + +int efrm_buffer_table_ctor(unsigned low, unsigned high) +{ + int log2_n_entries, rc, i; + + EFRM_ASSERT(high > 0); + EFRM_ASSERT(low < high); + + EFRM_TRACE("%s: low=%u high=%u", __func__, low, high); + EFRM_NOTICE("%s: low=%u high=%u", __func__, low, high); + + log2_n_entries = fls(high - 1); + + rc = efrm_buddy_ctor(&efrm_buffers.buddy, log2_n_entries); + if (rc < 0) { + EFRM_ERR("efrm_buffer_table_ctor: efrm_buddy_ctor(%d) " + "failed (%d)", log2_n_entries, rc); + return rc; + } + for (i = 0; i < (1 << log2_n_entries); ++i) { + rc = efrm_buddy_alloc(&efrm_buffers.buddy, 0); + EFRM_ASSERT(rc >= 0); + EFRM_ASSERT(rc < (1 << log2_n_entries)); + } + for (i = low; i < (int) high; ++i) + efrm_buddy_free(&efrm_buffers.buddy, i, 0); + + spin_lock_init(&efrm_buffers.lock); + + EFRM_TRACE("%s: done", __func__); + + return 0; +} + +void efrm_buffer_table_dtor(void) +{ + /* ?? debug check that all allocations have been freed? */ + + spin_lock_destroy(&efrm_buffers.lock); + efrm_buddy_dtor(&efrm_buffers.buddy); + + EFRM_TRACE("%s: done", __func__); +} + +/**********************************************************************/ + +int +efrm_buffer_table_alloc(unsigned order, + struct efhw_buffer_table_allocation *a) +{ + irq_flags_t lock_flags; + int rc; + + EFRM_ASSERT(&efrm_buffers.buddy); + EFRM_ASSERT(a); + + /* Round up to multiple of two, as the buffer clear logic works in + * pairs when not in "full" mode. */ + order = max_t(unsigned, order, 1); + + spin_lock_irqsave(&efrm_buffers.lock, lock_flags); + rc = efrm_buddy_alloc(&efrm_buffers.buddy, order); + spin_unlock_irqrestore(&efrm_buffers.lock, lock_flags); + + if (rc < 0) { + EFRM_ERR("efrm_buffer_table_alloc: failed (n=%ld) rc %d", + 1ul << order, rc); + return rc; + } + + EFRM_TRACE("efrm_buffer_table_alloc: base=%d n=%ld", + rc, 1ul << order); + a->order = order; + a->base = (unsigned)rc; + return 0; +} + +void efrm_buffer_table_free(struct efhw_buffer_table_allocation *a) +{ + irq_flags_t lock_flags; + struct efhw_nic *nic; + int nic_i; + + EFRM_ASSERT(&efrm_buffers.buddy); + EFRM_ASSERT(a); + EFRM_ASSERT(a->base != -1); + EFRM_ASSERT((unsigned long)a->base + (1ul << a->order) <= + efrm_buddy_size(&efrm_buffers.buddy)); + + EFRM_TRACE("efrm_buffer_table_free: base=%d n=%ld", + a->base, (1ul << a->order)); + + EFRM_FOR_EACH_NIC(nic_i, nic) + efhw_nic_buffer_table_clear(nic, a->base, 1ul << a->order); + + spin_lock_irqsave(&efrm_buffers.lock, lock_flags); + efrm_buddy_free(&efrm_buffers.buddy, a->base, a->order); + spin_unlock_irqrestore(&efrm_buffers.lock, lock_flags); + + EFRM_DO_DEBUG(a->base = a->order = -1); +} + +/**********************************************************************/ + +void +efrm_buffer_table_set(struct efhw_buffer_table_allocation *a, + struct efhw_nic *nic, + unsigned i, dma_addr_t dma_addr, int owner) +{ + EFRM_ASSERT(a); + EFRM_ASSERT(i < (unsigned)1 << a->order); + + efhw_nic_buffer_table_set(nic, dma_addr, EFHW_NIC_PAGE_SIZE, + 0, owner, a->base + i); +} + + +int efrm_buffer_table_size(void) +{ + return efrm_buddy_size(&efrm_buffers.buddy); +} + +/**********************************************************************/ + +int +efrm_page_register(struct efhw_nic *nic, dma_addr_t dma_addr, int owner, + efhw_buffer_addr_t *buf_addr_out) +{ + struct efhw_buffer_table_allocation alloc; + int rc; + + rc = efrm_buffer_table_alloc(0, &alloc); + if (rc == 0) { + efrm_buffer_table_set(&alloc, nic, 0, dma_addr, owner); + efrm_buffer_table_commit(); + *buf_addr_out = EFHW_BUFFER_ADDR(alloc.base, 0); + } + return rc; +} +EXPORT_SYMBOL(efrm_page_register); + +void efrm_page_unregister(efhw_buffer_addr_t buf_addr) +{ + struct efhw_buffer_table_allocation alloc; + + alloc.order = 0; + alloc.base = EFHW_BUFFER_PAGE(buf_addr); + efrm_buffer_table_free(&alloc); +} +EXPORT_SYMBOL(efrm_page_unregister); + +void efrm_buffer_table_commit(void) +{ + struct efhw_nic *nic; + int nic_i; + + EFRM_FOR_EACH_NIC(nic_i, nic) + efhw_nic_buffer_table_commit(nic); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/efx_vi_shm.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/efx_vi_shm.c @@ -0,0 +1,707 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides implementation of EFX VI API, used from Xen + * acceleration driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "linux_resource_internal.h" +#include +#include +#include +#include +#include +#include +#include "kernel_compat.h" + +#if EFX_VI_STATIC_FILTERS +struct filter_list_t { + struct filter_list_t *next; + struct filter_resource *fres; +}; +#endif + +struct efx_vi_state { + struct vi_resource *vi_res; + + int ifindex; + struct efrm_client *efrm_client; + struct efhw_nic *nic; + + void (*callback_fn)(void *arg, int is_timeout); + void *callback_arg; + + struct completion flush_completion; + +#if EFX_VI_STATIC_FILTERS + struct filter_list_t fres[EFX_VI_STATIC_FILTERS]; + struct filter_list_t *free_fres; + struct filter_list_t *used_fres; +#endif +}; + +static void efx_vi_flush_complete(void *state_void) +{ + struct efx_vi_state *state = (struct efx_vi_state *)state_void; + + complete(&state->flush_completion); +} + +static inline int alloc_ep(struct efx_vi_state *state) +{ + int rc; + + rc = efrm_vi_resource_alloc(state->efrm_client, NULL, EFHW_VI_JUMBO_EN, + efx_vi_eventq_size, + FALCON_DMA_Q_DEFAULT_TX_SIZE, + FALCON_DMA_Q_DEFAULT_RX_SIZE, + 0, 0, &state->vi_res, NULL, NULL, NULL, + NULL); + if (rc < 0) { + EFRM_ERR("%s: ERROR efrm_vi_resource_alloc error %d", + __func__, rc); + return rc; + } + + efrm_vi_register_flush_callback(state->vi_res, &efx_vi_flush_complete, + (void *)state); + + return 0; +} + +static int free_ep(struct efx_vi_state *efx_state) +{ + efrm_vi_resource_release(efx_state->vi_res); + + return 0; +} + +#if EFX_VI_STATIC_FILTERS +static int efx_vi_alloc_static_filters(struct efx_vi_state *efx_state) +{ + int i; + int rc; + + efx_state->free_fres = efx_state->used_fres = NULL; + + for (i = 0; i < EFX_VI_STATIC_FILTERS; i++) { + rc = efrm_filter_resource_alloc(efx_state->vi_res, + &efx_state->fres[i].fres); + if (rc < 0) { + EFRM_ERR("%s: efrm_filter_resource_alloc failed: %d", + __func__, rc); + while (i > 0) { + i--; + efrm_filter_resource_release(efx_state-> + fres[i].fres); + } + efx_state->free_fres = NULL; + return rc; + } + efx_state->fres[i].next = efx_state->free_fres; + efx_state->free_fres = &efx_state->fres[i]; + } + + return 0; +} +#endif + +int efx_vi_alloc(struct efx_vi_state **vih_out, int ifindex) +{ + struct efx_vi_state *efx_state; + int rc; + + efx_state = kmalloc(sizeof(struct efx_vi_state), GFP_KERNEL); + + if (!efx_state) { + EFRM_ERR("%s: failed to allocate memory for efx_vi_state", + __func__); + rc = -ENOMEM; + goto fail; + } + + efx_state->ifindex = ifindex; + rc = efrm_client_get(ifindex, NULL, NULL, &efx_state->efrm_client); + if (rc < 0) { + EFRM_ERR("%s: efrm_client_get(%d) failed: %d", __func__, + ifindex, rc); + rc = -ENODEV; + goto fail_no_ifindex; + } + efx_state->nic = efrm_client_get_nic(efx_state->efrm_client); + + init_completion(&efx_state->flush_completion); + + /* basically allocate_pt_endpoint() */ + rc = alloc_ep(efx_state); + if (rc) { + EFRM_ERR("%s: alloc_ep failed: %d", __func__, rc); + goto fail_no_pt; + } +#if EFX_VI_STATIC_FILTERS + /* Statically allocate a set of filter resources - removes the + restriction on not being able to use efx_vi_filter() from + in_atomic() */ + rc = efx_vi_alloc_static_filters(efx_state); + if (rc) + goto fail_no_filters; +#endif + + *vih_out = efx_state; + + return 0; +#if EFX_VI_STATIC_FILTERS +fail_no_filters: + free_ep(efx_state); +#endif +fail_no_pt: + efrm_client_put(efx_state->efrm_client); +fail_no_ifindex: + kfree(efx_state); +fail: + return rc; +} +EXPORT_SYMBOL(efx_vi_alloc); + +void efx_vi_free(struct efx_vi_state *vih) +{ + struct efx_vi_state *efx_state = vih; + + /* TODO flush dma channels, init dma queues?. See ef_free_vnic() */ +#if EFX_VI_STATIC_FILTERS + int i; + + for (i = 0; i < EFX_VI_STATIC_FILTERS; i++) + efrm_filter_resource_release(efx_state->fres[i].fres); +#endif + + if (efx_state->vi_res) + free_ep(efx_state); + + efrm_client_put(efx_state->efrm_client); + + kfree(efx_state); +} +EXPORT_SYMBOL(efx_vi_free); + +void efx_vi_reset(struct efx_vi_state *vih) +{ + struct efx_vi_state *efx_state = vih; + + efrm_pt_flush(efx_state->vi_res); + + while (wait_for_completion_timeout(&efx_state->flush_completion, HZ) + == 0) + efrm_vi_resource_flush_retry(efx_state->vi_res); + + /* Bosch the eventq */ + efrm_eventq_reset(efx_state->vi_res); + return; +} +EXPORT_SYMBOL(efx_vi_reset); + +static void +efx_vi_eventq_callback(void *context, int is_timeout, struct efhw_nic *nic) +{ + struct efx_vi_state *efx_state = (struct efx_vi_state *)context; + + EFRM_ASSERT(efx_state->callback_fn); + + return efx_state->callback_fn(efx_state->callback_arg, is_timeout); +} + +int +efx_vi_eventq_register_callback(struct efx_vi_state *vih, + void (*callback)(void *context, int is_timeout), + void *context) +{ + struct efx_vi_state *efx_state = vih; + + efx_state->callback_fn = callback; + efx_state->callback_arg = context; + + /* Register the eventq timeout event callback */ + efrm_eventq_register_callback(efx_state->vi_res, + efx_vi_eventq_callback, efx_state); + + return 0; +} +EXPORT_SYMBOL(efx_vi_eventq_register_callback); + +int efx_vi_eventq_kill_callback(struct efx_vi_state *vih) +{ + struct efx_vi_state *efx_state = vih; + + if (efx_state->vi_res->evq_callback_fn) + efrm_eventq_kill_callback(efx_state->vi_res); + + efx_state->callback_fn = NULL; + efx_state->callback_arg = NULL; + + return 0; +} +EXPORT_SYMBOL(efx_vi_eventq_kill_callback); + +struct efx_vi_dma_map_state { + struct efhw_buffer_table_allocation bt_handle; + int n_pages; + dma_addr_t *dma_addrs; +}; + +int +efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, + int n_pages, struct efx_vi_dma_map_state **dmh_out) +{ + struct efx_vi_state *efx_state = vih; + int order = fls(n_pages - 1), rc, i, evq_id; + dma_addr_t dma_addr; + struct efx_vi_dma_map_state *dm_state; + + if (n_pages != (1 << order)) { + EFRM_WARN("%s: Can only allocate buffers in power of 2 " + "sizes (not %d)", __func__, n_pages); + return -EINVAL; + } + + dm_state = kmalloc(sizeof(struct efx_vi_dma_map_state), GFP_KERNEL); + if (!dm_state) + return -ENOMEM; + + dm_state->dma_addrs = kmalloc(sizeof(dma_addr_t) * n_pages, + GFP_KERNEL); + if (!dm_state->dma_addrs) { + kfree(dm_state); + return -ENOMEM; + } + + rc = efrm_buffer_table_alloc(order, &dm_state->bt_handle); + if (rc < 0) { + kfree(dm_state->dma_addrs); + kfree(dm_state); + return rc; + } + + evq_id = EFRM_RESOURCE_INSTANCE(efx_state->vi_res->rs.rs_handle); + for (i = 0; i < n_pages; i++) { + /* TODO do we need to get_page() here ? */ + + dma_addr = pci_map_page(linux_efhw_nic(efx_state->nic)-> + pci_dev, pages[i], 0, PAGE_SIZE, + PCI_DMA_TODEVICE); + + efrm_buffer_table_set(&dm_state->bt_handle, efx_state->nic, + i, dma_addr, evq_id); + + dm_state->dma_addrs[i] = dma_addr; + + /* Would be nice to not have to call commit each time, but + * comment says there are hardware restrictions on how often + * you can go without it, so do this to be safe */ + efrm_buffer_table_commit(); + } + + dm_state->n_pages = n_pages; + + *dmh_out = dm_state; + + return 0; +} +EXPORT_SYMBOL(efx_vi_dma_map_pages); + +/* Function needed as Xen can't get pages for grants in dom0, but can + get dma address */ +int +efx_vi_dma_map_addrs(struct efx_vi_state *vih, + unsigned long long *bus_dev_addrs, + int n_pages, struct efx_vi_dma_map_state **dmh_out) +{ + struct efx_vi_state *efx_state = vih; + int order = fls(n_pages - 1), rc, i, evq_id; + dma_addr_t dma_addr; + struct efx_vi_dma_map_state *dm_state; + + if (n_pages != (1 << order)) { + EFRM_WARN("%s: Can only allocate buffers in power of 2 " + "sizes (not %d)", __func__, n_pages); + return -EINVAL; + } + + dm_state = kmalloc(sizeof(struct efx_vi_dma_map_state), GFP_KERNEL); + if (!dm_state) + return -ENOMEM; + + dm_state->dma_addrs = kmalloc(sizeof(dma_addr_t) * n_pages, + GFP_KERNEL); + if (!dm_state->dma_addrs) { + kfree(dm_state); + return -ENOMEM; + } + + rc = efrm_buffer_table_alloc(order, &dm_state->bt_handle); + if (rc < 0) { + kfree(dm_state->dma_addrs); + kfree(dm_state); + return rc; + } + + evq_id = EFRM_RESOURCE_INSTANCE(efx_state->vi_res->rs.rs_handle); +#if 0 + EFRM_WARN("%s: mapping %d pages to evq %d, bt_ids %d-%d\n", + __func__, n_pages, evq_id, + dm_state->bt_handle.base, + dm_state->bt_handle.base + n_pages); +#endif + for (i = 0; i < n_pages; i++) { + + dma_addr = (dma_addr_t)bus_dev_addrs[i]; + + efrm_buffer_table_set(&dm_state->bt_handle, efx_state->nic, + i, dma_addr, evq_id); + + dm_state->dma_addrs[i] = dma_addr; + + /* Would be nice to not have to call commit each time, but + * comment says there are hardware restrictions on how often + * you can go without it, so do this to be safe */ + efrm_buffer_table_commit(); + } + + dm_state->n_pages = n_pages; + + *dmh_out = dm_state; + + return 0; +} +EXPORT_SYMBOL(efx_vi_dma_map_addrs); + +void +efx_vi_dma_unmap_pages(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh) +{ + struct efx_vi_state *efx_state = vih; + struct efx_vi_dma_map_state *dm_state = + (struct efx_vi_dma_map_state *)dmh; + int i; + + efrm_buffer_table_free(&dm_state->bt_handle); + + for (i = 0; i < dm_state->n_pages; ++i) + pci_unmap_page(linux_efhw_nic(efx_state->nic)->pci_dev, + dm_state->dma_addrs[i], PAGE_SIZE, + PCI_DMA_TODEVICE); + + kfree(dm_state->dma_addrs); + kfree(dm_state); + + return; +} +EXPORT_SYMBOL(efx_vi_dma_unmap_pages); + +void +efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh) +{ + struct efx_vi_dma_map_state *dm_state = + (struct efx_vi_dma_map_state *)dmh; + + efrm_buffer_table_free(&dm_state->bt_handle); + + kfree(dm_state->dma_addrs); + kfree(dm_state); + + return; +} +EXPORT_SYMBOL(efx_vi_dma_unmap_addrs); + +unsigned +efx_vi_dma_get_map_addr(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh) +{ + struct efx_vi_dma_map_state *dm_state = + (struct efx_vi_dma_map_state *)dmh; + + return EFHW_BUFFER_ADDR(dm_state->bt_handle.base, 0); +} +EXPORT_SYMBOL(efx_vi_dma_get_map_addr); + +#if EFX_VI_STATIC_FILTERS +static int +get_filter(struct efx_vi_state *efx_state, + efrm_resource_handle_t pthandle, struct filter_resource **fres_out) +{ + struct filter_list_t *flist; + if (efx_state->free_fres == NULL) + return -ENOMEM; + else { + flist = efx_state->free_fres; + efx_state->free_fres = flist->next; + flist->next = efx_state->used_fres; + efx_state->used_fres = flist; + *fres_out = flist->fres; + return 0; + } +} +#endif + +static void +release_filter(struct efx_vi_state *efx_state, struct filter_resource *fres) +{ +#if EFX_VI_STATIC_FILTERS + struct filter_list_t *flist = efx_state->used_fres, *prev = NULL; + while (flist) { + if (flist->fres == fres) { + if (prev) + prev->next = flist->next; + else + efx_state->used_fres = flist->next; + flist->next = efx_state->free_fres; + efx_state->free_fres = flist; + return; + } + prev = flist; + flist = flist->next; + } + EFRM_ERR("%s: couldn't find filter", __func__); +#else + return efrm_filter_resource_release(fres); +#endif +} + +int +efx_vi_filter(struct efx_vi_state *vih, int protocol, + unsigned ip_addr_be32, int port_le16, + struct filter_resource_t **fh_out) +{ + struct efx_vi_state *efx_state = vih; + struct filter_resource *uninitialized_var(frs); + int rc; + +#if EFX_VI_STATIC_FILTERS + rc = get_filter(efx_state, efx_state->vi_res->rs.rs_handle, &frs); +#else + rc = efrm_filter_resource_alloc(efx_state->vi_res, &frs); +#endif + if (rc < 0) + return rc; + + /* Add the hardware filter. We pass in the source port and address + * as 0 (wildcard) to minimise the number of filters needed. */ + if (protocol == IPPROTO_TCP) { + rc = efrm_filter_resource_tcp_set(frs, 0, 0, ip_addr_be32, + port_le16); + } else { + rc = efrm_filter_resource_udp_set(frs, 0, 0, ip_addr_be32, + port_le16); + } + + *fh_out = (struct filter_resource_t *)frs; + + return rc; +} +EXPORT_SYMBOL(efx_vi_filter); + +int +efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh) +{ + struct efx_vi_state *efx_state = vih; + struct filter_resource *frs = (struct filter_resource *)fh; + int rc; + + rc = efrm_filter_resource_clear(frs); + release_filter(efx_state, frs); + + return rc; +} +EXPORT_SYMBOL(efx_vi_filter_stop); + +int +efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length) +{ + EFRM_NOTICE("%s: TODO!", __func__); + + return 0; +} +EXPORT_SYMBOL(efx_vi_hw_resource_get_virt); + +int +efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length) +{ + struct efx_vi_state *efx_state = vih; + struct linux_efhw_nic *lnic = linux_efhw_nic(efx_state->nic); + unsigned long phys = lnic->ctr_ap_pci_addr; + struct efrm_resource *ep_res = &efx_state->vi_res->rs; + unsigned ep_mmap_bytes; + int i; + + if (*length < EFX_VI_HW_RESOURCE_MAXSIZE) + return -EINVAL; + + mdata->nic_arch = efx_state->nic->devtype.arch; + mdata->nic_variant = efx_state->nic->devtype.variant; + mdata->nic_revision = efx_state->nic->devtype.revision; + + mdata->evq_order = + efx_state->vi_res->nic_info.evq_pages.iobuff.order; + mdata->evq_offs = efx_state->vi_res->nic_info.evq_pages.iobuff_off; + mdata->evq_capacity = efx_vi_eventq_size; + mdata->instance = EFRM_RESOURCE_INSTANCE(ep_res->rs_handle); + mdata->rx_capacity = FALCON_DMA_Q_DEFAULT_RX_SIZE; + mdata->tx_capacity = FALCON_DMA_Q_DEFAULT_TX_SIZE; + + ep_mmap_bytes = FALCON_DMA_Q_DEFAULT_MMAP; + EFRM_ASSERT(ep_mmap_bytes == PAGE_SIZE * 2); + +#ifndef NDEBUG + { + /* Sanity about doorbells */ + unsigned long tx_dma_page_addr, rx_dma_page_addr; + + /* get rx doorbell address */ + rx_dma_page_addr = + phys + falcon_rx_dma_page_addr(mdata->instance); + /* get tx doorbell address */ + tx_dma_page_addr = + phys + falcon_tx_dma_page_addr(mdata->instance); + + /* Check the lower bits of the TX doorbell will be + * consistent. */ + EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & + FALCON_DMA_PAGE_MASK) == + (TX_DESC_UPD_REG_PAGE123K_OFST & + FALCON_DMA_PAGE_MASK)); + + /* Check the lower bits of the RX doorbell will be + * consistent. */ + EFRM_ASSERT((RX_DESC_UPD_REG_PAGE4_OFST & + FALCON_DMA_PAGE_MASK) == + (RX_DESC_UPD_REG_PAGE123K_OFST & + FALCON_DMA_PAGE_MASK)); + + /* Check that the doorbells will be in the same page. */ + EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & PAGE_MASK) == + (RX_DESC_UPD_REG_PAGE4_OFST & PAGE_MASK)); + + /* Check that the doorbells are in the same page. */ + EFRM_ASSERT((tx_dma_page_addr & PAGE_MASK) == + (rx_dma_page_addr & PAGE_MASK)); + + /* Check that the TX doorbell offset is correct. */ + EFRM_ASSERT((TX_DESC_UPD_REG_PAGE4_OFST & ~PAGE_MASK) == + (tx_dma_page_addr & ~PAGE_MASK)); + + /* Check that the RX doorbell offset is correct. */ + EFRM_ASSERT((RX_DESC_UPD_REG_PAGE4_OFST & ~PAGE_MASK) == + (rx_dma_page_addr & ~PAGE_MASK)); + } +#endif + + i = 0; + hw_res_array[i].type = EFX_VI_HW_RESOURCE_TXDMAQ; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = + (unsigned long)efx_state->vi_res->nic_info. + dmaq_pages[EFRM_VI_RM_DMA_QUEUE_TX].kva; + + i++; + hw_res_array[i].type = EFX_VI_HW_RESOURCE_RXDMAQ; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = + (unsigned long)efx_state->vi_res->nic_info. + dmaq_pages[EFRM_VI_RM_DMA_QUEUE_RX].kva; + + i++; + hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQTIMER; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = + (unsigned long)phys + falcon_timer_page_addr(mdata->instance); + + /* NB EFX_VI_HW_RESOURCE_EVQPTR not used on Falcon */ + + i++; + switch (efx_state->nic->devtype.variant) { + case 'A': + hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQRPTR; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = (unsigned long)phys + + EVQ_RPTR_REG_OFST + + (FALCON_REGISTER128 * mdata->instance); + break; + case 'B': + hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = + (unsigned long)FALCON_EVQ_RPTR_REG_P0; + break; + default: + EFRM_ASSERT(0); + break; + } + + i++; + hw_res_array[i].type = EFX_VI_HW_RESOURCE_EVQMEMKVA; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_IOBUFFER; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = (unsigned long)efx_state->vi_res-> + nic_info.evq_pages.iobuff.kva; + + i++; + hw_res_array[i].type = EFX_VI_HW_RESOURCE_BELLPAGE; + hw_res_array[i].mem_type = EFX_VI_HW_RESOURCE_PERIPHERAL; + hw_res_array[i].more_to_follow = 0; + hw_res_array[i].length = PAGE_SIZE; + hw_res_array[i].address = + (unsigned long)(phys + + falcon_tx_dma_page_addr(mdata->instance)) + >> PAGE_SHIFT; + + i++; + + EFRM_ASSERT(i <= *length); + + *length = i; + + return 0; +} +EXPORT_SYMBOL(efx_vi_hw_resource_get_phys); --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/resources.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/resources.c @@ -0,0 +1,94 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains resource managers initialisation functions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +int +efrm_resources_init(const struct vi_resource_dimensions *vi_res_dim, + int buffer_table_min, int buffer_table_lim) +{ + int i, rc; + + rc = efrm_buffer_table_ctor(buffer_table_min, buffer_table_lim); + if (rc != 0) + return rc; + + /* Create resources in the correct order */ + for (i = 0; i < EFRM_RESOURCE_NUM; ++i) { + struct efrm_resource_manager **rmp = &efrm_rm_table[i]; + + EFRM_ASSERT(*rmp == NULL); + switch (i) { + case EFRM_RESOURCE_VI: + rc = efrm_create_vi_resource_manager(rmp, + vi_res_dim); + break; + case EFRM_RESOURCE_FILTER: + rc = efrm_create_filter_resource_manager(rmp); + break; + case EFRM_RESOURCE_IOBUFSET: + rc = efrm_create_iobufset_resource_manager(rmp); + break; + default: + rc = 0; + break; + } + + if (rc < 0) { + EFRM_ERR("%s: failed type=%d (%d)", + __func__, i, rc); + efrm_buffer_table_dtor(); + return rc; + } + } + + return 0; +} + +void efrm_resources_fini(void) +{ + int i; + + for (i = EFRM_RESOURCE_NUM - 1; i >= 0; --i) + if (efrm_rm_table[i]) { + efrm_resource_manager_dtor(efrm_rm_table[i]); + efrm_rm_table[i] = NULL; + } + + efrm_buffer_table_dtor(); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/iopage.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/iopage.c @@ -0,0 +1,103 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides Linux-specific implementation for iopage API used + * from efhw library. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include "kernel_compat.h" +#include /* for dma_addr_t */ + +int efhw_iopage_alloc(struct efhw_nic *nic, struct efhw_iopage *p) +{ + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + dma_addr_t handle; + void *kva; + + kva = efrm_pci_alloc_consistent(lnic->pci_dev, PAGE_SIZE, + &handle); + if (kva == 0) + return -ENOMEM; + + EFHW_ASSERT((handle & ~PAGE_MASK) == 0); + + memset((void *)kva, 0, PAGE_SIZE); + efhw_page_init_from_va(&p->p, kva); + + p->dma_addr = handle; + + return 0; +} + +void efhw_iopage_free(struct efhw_nic *nic, struct efhw_iopage *p) +{ + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + EFHW_ASSERT(efhw_page_is_valid(&p->p)); + + efrm_pci_free_consistent(lnic->pci_dev, PAGE_SIZE, + efhw_iopage_ptr(p), p->dma_addr); +} + +int +efhw_iopages_alloc(struct efhw_nic *nic, struct efhw_iopages *p, + unsigned order) +{ + unsigned bytes = 1u << (order + PAGE_SHIFT); + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + dma_addr_t handle; + caddr_t addr; + int gfp_flag; + + /* Set __GFP_COMP if available to make reference counting work. + * This is recommended here: + * http://www.forbiddenweb.org/viewtopic.php?id=83167&page=4#348331 + */ + gfp_flag = ((in_atomic() ? GFP_ATOMIC : GFP_KERNEL) | __GFP_COMP); + addr = efrm_dma_alloc_coherent(&lnic->pci_dev->dev, bytes, &handle, + gfp_flag); + if (addr == NULL) + return -ENOMEM; + + EFHW_ASSERT((handle & ~PAGE_MASK) == 0); + + p->order = order; + p->dma_addr = handle; + p->kva = addr; + + return 0; +} + +void efhw_iopages_free(struct efhw_nic *nic, struct efhw_iopages *p) +{ + unsigned bytes = 1u << (p->order + PAGE_SHIFT); + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + + efrm_dma_free_coherent(&lnic->pci_dev->dev, bytes, + (void *)p->kva, p->dma_addr); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/driverlink_new.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/driverlink_new.c @@ -0,0 +1,260 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains driverlink code which interacts with the sfc network + * driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "linux_resource_internal.h" +#include "driverlink_api.h" +#include "kernel_compat.h" +#include + +#include +#include +#include + +/* The DL driver and associated calls */ +static int efrm_dl_probe(struct efx_dl_device *efrm_dev, + const struct net_device *net_dev, + const struct efx_dl_device_info *dev_info, + const char *silicon_rev); + +static void efrm_dl_remove(struct efx_dl_device *efrm_dev); + +static void efrm_dl_reset_suspend(struct efx_dl_device *efrm_dev); + +static void efrm_dl_reset_resume(struct efx_dl_device *efrm_dev, int ok); + +static void efrm_dl_mtu_changed(struct efx_dl_device *, int); +static void efrm_dl_event_falcon(struct efx_dl_device *efx_dev, void *p_event); + +static struct efx_dl_driver efrm_dl_driver = { + .name = "resource", + .probe = efrm_dl_probe, + .remove = efrm_dl_remove, + .reset_suspend = efrm_dl_reset_suspend, + .reset_resume = efrm_dl_reset_resume +}; + +static void +init_vi_resource_dimensions(struct vi_resource_dimensions *rd, + const struct efx_dl_falcon_resources *res) +{ + rd->evq_timer_min = res->evq_timer_min; + rd->evq_timer_lim = res->evq_timer_lim; + rd->evq_int_min = res->evq_int_min; + rd->evq_int_lim = res->evq_int_lim; + rd->rxq_min = res->rxq_min; + rd->rxq_lim = res->rxq_lim; + rd->txq_min = res->txq_min; + rd->txq_lim = res->txq_lim; + EFRM_TRACE + ("Using evq_int(%d-%d) evq_timer(%d-%d) RXQ(%d-%d) TXQ(%d-%d)", + res->evq_int_min, res->evq_int_lim, res->evq_timer_min, + res->evq_timer_lim, res->rxq_min, res->rxq_lim, res->txq_min, + res->txq_lim); +} + +static int +efrm_dl_probe(struct efx_dl_device *efrm_dev, + const struct net_device *net_dev, + const struct efx_dl_device_info *dev_info, + const char *silicon_rev) +{ + struct vi_resource_dimensions res_dim; + struct efx_dl_falcon_resources *res; + struct linux_efhw_nic *lnic; + struct pci_dev *dev; + struct efhw_nic *nic; + unsigned probe_flags = 0; + int non_irq_evq; + int rc; + + efrm_dev->priv = NULL; + + efx_dl_search_device_info(dev_info, EFX_DL_FALCON_RESOURCES, + struct efx_dl_falcon_resources, + hdr, res); + + if (res == NULL) { + EFRM_ERR("%s: Unable to find falcon driverlink resources", + __func__); + return -EINVAL; + } + + if (res->flags & EFX_DL_FALCON_USE_MSI) + probe_flags |= NIC_FLAG_TRY_MSI; + + dev = efrm_dev->pci_dev; + if (res->flags & EFX_DL_FALCON_DUAL_FUNC) { + unsigned vendor = dev->vendor; + EFRM_ASSERT(dev->bus != NULL); + dev = NULL; + + while ((dev = pci_get_device(vendor, FALCON_S_DEVID, dev)) + != NULL) { + EFRM_ASSERT(dev->bus != NULL); + /* With PCIe (since it's point to point) + * the slot ID is usually 0 and + * the bus ID changes NIC to NIC, so we really + * need to check both. */ + if (PCI_SLOT(dev->devfn) == + PCI_SLOT(efrm_dev->pci_dev->devfn) + && dev->bus->number == + efrm_dev->pci_dev->bus->number) + break; + } + if (dev == NULL) { + EFRM_ERR("%s: Unable to find falcon secondary " + "PCI device.", __func__); + return -ENODEV; + } + pci_dev_put(dev); + } + + init_vi_resource_dimensions(&res_dim, res); + + EFRM_ASSERT(res_dim.evq_timer_lim > res_dim.evq_timer_min); + res_dim.evq_timer_lim--; + non_irq_evq = res_dim.evq_timer_lim; + + rc = efrm_nic_add(dev, probe_flags, net_dev->dev_addr, &lnic, + res->biu_lock, + res->buffer_table_min, res->buffer_table_lim, + non_irq_evq, &res_dim); + if (rc != 0) + return rc; + + nic = &lnic->efrm_nic.efhw_nic; + nic->mtu = net_dev->mtu + ETH_HLEN; + nic->net_driver_dev = efrm_dev; + nic->ifindex = net_dev->ifindex; +#ifdef CONFIG_NET_NS + nic->nd_net = net_dev->nd_net; +#endif + efrm_dev->priv = nic; + + /* Register a callback so we're told when MTU changes. + * We dynamically allocate efx_dl_callbacks, because + * the callbacks that we want depends on the NIC type. + */ + lnic->dl_callbacks = + kmalloc(sizeof(struct efx_dl_callbacks), GFP_KERNEL); + if (!lnic->dl_callbacks) { + EFRM_ERR("Out of memory (%s)", __func__); + efrm_nic_del(lnic); + return -ENOMEM; + } + memset(lnic->dl_callbacks, 0, sizeof(*lnic->dl_callbacks)); + lnic->dl_callbacks->mtu_changed = efrm_dl_mtu_changed; + + if ((res->flags & EFX_DL_FALCON_DUAL_FUNC) == 0) { + /* Net driver receives all management events. + * Register a callback to receive the ones + * we're interested in. */ + lnic->dl_callbacks->event = efrm_dl_event_falcon; + } + + rc = efx_dl_register_callbacks(efrm_dev, lnic->dl_callbacks); + if (rc < 0) { + EFRM_ERR("%s: efx_dl_register_callbacks failed (%d)", + __func__, rc); + kfree(lnic->dl_callbacks); + efrm_nic_del(lnic); + return rc; + } + + return 0; +} + +/* When we unregister ourselves on module removal, this function will be + * called for all the devices we claimed */ +static void efrm_dl_remove(struct efx_dl_device *efrm_dev) +{ + struct efhw_nic *nic = efrm_dev->priv; + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + EFRM_TRACE("%s called", __func__); + if (lnic->dl_callbacks) { + efx_dl_unregister_callbacks(efrm_dev, lnic->dl_callbacks); + kfree(lnic->dl_callbacks); + } + if (efrm_dev->priv) + efrm_nic_del(lnic); + EFRM_TRACE("%s OK", __func__); +} + +static void efrm_dl_reset_suspend(struct efx_dl_device *efrm_dev) +{ + EFRM_NOTICE("%s:", __func__); +} + +static void efrm_dl_reset_resume(struct efx_dl_device *efrm_dev, int ok) +{ + EFRM_NOTICE("%s: ok=%d", __func__, ok); +} + +int efrm_driverlink_register(void) +{ + EFRM_TRACE("%s:", __func__); + return efx_dl_register_driver(&efrm_dl_driver); +} + +void efrm_driverlink_unregister(void) +{ + EFRM_TRACE("%s:", __func__); + efx_dl_unregister_driver(&efrm_dl_driver); +} + +static void efrm_dl_mtu_changed(struct efx_dl_device *efx_dev, int mtu) +{ + struct efhw_nic *nic = efx_dev->priv; + + ASSERT_RTNL(); /* Since we're looking at efx_dl_device::port_net_dev */ + + EFRM_TRACE("%s: old=%d new=%d", __func__, nic->mtu, mtu + ETH_HLEN); + /* If this happened we must have agreed to it above */ + nic->mtu = mtu + ETH_HLEN; +} + +static void efrm_dl_event_falcon(struct efx_dl_device *efx_dev, void *p_event) +{ + struct efhw_nic *nic = efx_dev->priv; + struct linux_efhw_nic *lnic = linux_efhw_nic(nic); + efhw_event_t *ev = p_event; + + switch (FALCON_EVENT_CODE(ev)) { + case FALCON_EVENT_CODE_CHAR: + falcon_handle_char_event(nic, lnic->ev_handlers, ev); + break; + default: + EFRM_WARN("%s: unknown event type=%x", __func__, + (unsigned)FALCON_EVENT_CODE(ev)); + break; + } +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/vi_resource_manager.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/vi_resource_manager.c @@ -0,0 +1,231 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains the VI resource manager. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +int efrm_pt_pace(struct vi_resource *virs, unsigned int val) +{ + struct efhw_nic *nic = virs->rs.rs_client->nic; + int instance; + + EFRM_RESOURCE_ASSERT_VALID(&virs->rs, 0); + instance = EFRM_RESOURCE_INSTANCE(virs->rs.rs_handle); + falcon_nic_pace(nic, instance, val); + EFRM_TRACE("%s[%d]=%d DONE", __func__, instance, val); + return 0; +} +EXPORT_SYMBOL(efrm_pt_pace); + +/*** Resource manager creation/destruction *******************************/ + +static void efrm_vi_rm_dtor(struct efrm_resource_manager *rm); + +static int +efrm_create_or_destroy_vi_resource_manager( + struct efrm_resource_manager **rm_in_out, + const struct vi_resource_dimensions *dims, + bool destroy) +{ + struct vi_resource *virs; + struct list_head *pos, *temp; + struct list_head flush_pending; + irq_flags_t lock_flags; + int rc; + unsigned dmaq_min, dmaq_lim; + + EFRM_ASSERT(rm_in_out); + + if (destroy) + goto destroy; + + EFRM_ASSERT(dims); + EFRM_NOTICE("vi_resource_manager: evq_int=%u-%u evq_timer=%u-%u", + dims->evq_int_min, dims->evq_int_lim, + dims->evq_timer_min, dims->evq_timer_lim); + EFRM_NOTICE("vi_resource_manager: rxq=%u-%u txq=%u-%u", + dims->rxq_min, dims->rxq_lim, + dims->txq_min, dims->txq_lim); + + efrm_vi_manager = kmalloc(sizeof(*efrm_vi_manager), GFP_KERNEL); + if (efrm_vi_manager == NULL) { + rc = -ENOMEM; + goto fail_alloc; + } + + memset(efrm_vi_manager, 0, sizeof(*efrm_vi_manager)); + + efrm_vi_manager->iscsi_dmaq_instance_is_free = true; + + dmaq_min = max(dims->rxq_min, dims->txq_min); + dmaq_lim = min(dims->rxq_lim, dims->txq_lim); + + efrm_vi_manager->with_timer_base = + max(dmaq_min, dims->evq_timer_min); + efrm_vi_manager->with_timer_limit = + min(dmaq_lim, dims->evq_timer_lim); + rc = efrm_kfifo_id_ctor(&efrm_vi_manager->instances_with_timer, + efrm_vi_manager->with_timer_base, + efrm_vi_manager->with_timer_limit, + &efrm_vi_manager->rm.rm_lock); + if (rc < 0) + goto fail_with_timer_id_pool; + + efrm_vi_manager->with_interrupt_base = + max(dmaq_min, dims->evq_int_min); + efrm_vi_manager->with_interrupt_limit = + min(dmaq_lim, dims->evq_int_lim); + efrm_vi_manager->with_interrupt_limit = + max(efrm_vi_manager->with_interrupt_limit, + efrm_vi_manager->with_interrupt_base); + rc = efrm_kfifo_id_ctor(&efrm_vi_manager->instances_with_interrupt, + efrm_vi_manager->with_interrupt_base, + efrm_vi_manager->with_interrupt_limit, + &efrm_vi_manager->rm.rm_lock); + if (rc < 0) + goto fail_with_int_id_pool; + + INIT_LIST_HEAD(&efrm_vi_manager->rx_flush_waiting_list); + INIT_LIST_HEAD(&efrm_vi_manager->rx_flush_outstanding_list); + INIT_LIST_HEAD(&efrm_vi_manager->tx_flush_outstanding_list); + efrm_vi_manager->rx_flush_outstanding_count = 0; + + INIT_LIST_HEAD(&efrm_vi_manager->close_pending); + efrm_vi_manager->workqueue = create_workqueue("sfc_vi"); + if (efrm_vi_manager->workqueue == NULL) + goto fail_create_workqueue; + INIT_WORK(&efrm_vi_manager->work_item, efrm_vi_rm_delayed_free); + + /* NB. This must be the last step to avoid things getting tangled. + * efrm_resource_manager_dtor calls the vi_rm_dtor which ends up in + * this function. */ + rc = efrm_resource_manager_ctor(&efrm_vi_manager->rm, efrm_vi_rm_dtor, + "VI", EFRM_RESOURCE_VI); + if (rc < 0) + goto fail_rm_ctor; + + *rm_in_out = &efrm_vi_manager->rm; + return 0; + +destroy: + rc = 0; + EFRM_RESOURCE_MANAGER_ASSERT_VALID(*rm_in_out); + + /* Abort outstanding flushes. Note, a VI resource can be on more + * than one of these lists. We handle this by starting with the TX + * list and then append VIs to this list if they aren't on the TX + * list already. A VI is on the TX flush list if tx_flushing + * is not empty. */ + spin_lock_irqsave(&efrm_vi_manager->rm.rm_lock, lock_flags); + + list_replace_init(&efrm_vi_manager->tx_flush_outstanding_list, + &flush_pending); + + list_for_each_safe(pos, temp, + &efrm_vi_manager->rx_flush_waiting_list) { + virs = container_of(pos, struct vi_resource, rx_flush_link); + + list_del(&virs->rx_flush_link); + if (virs->tx_flushing == 0) + list_add_tail(&virs->tx_flush_link, &flush_pending); + } + + list_for_each_safe(pos, temp, + &efrm_vi_manager->rx_flush_outstanding_list) { + virs = container_of(pos, struct vi_resource, rx_flush_link); + + list_del(&virs->rx_flush_link); + if (virs->tx_flushing == 0) + list_add_tail(&virs->tx_flush_link, &flush_pending); + } + + spin_unlock_irqrestore(&efrm_vi_manager->rm.rm_lock, lock_flags); + + while (!list_empty(&flush_pending)) { + virs = + list_entry(list_pop(&flush_pending), struct vi_resource, + tx_flush_link); + EFRM_TRACE("%s: found PT endpoint " EFRM_RESOURCE_FMT + " with flush pending [Tx=0x%x, Rx=0x%x, RxO=0x%x]", + __func__, + EFRM_RESOURCE_PRI_ARG(virs->rs.rs_handle), + virs->tx_flushing, + virs->rx_flushing, + virs->rx_flush_outstanding); + efrm_vi_rm_free_flushed_resource(virs); + } + +fail_rm_ctor: + + /* Complete outstanding closes. */ + destroy_workqueue(efrm_vi_manager->workqueue); +fail_create_workqueue: + EFRM_ASSERT(list_empty(&efrm_vi_manager->close_pending)); + kfifo_vfree(efrm_vi_manager->instances_with_interrupt); +fail_with_int_id_pool: + + kfifo_vfree(efrm_vi_manager->instances_with_timer); +fail_with_timer_id_pool: + + if (destroy) + return 0; + + EFRM_DO_DEBUG(memset(efrm_vi_manager, 0, sizeof(*efrm_vi_manager))); + kfree(efrm_vi_manager); +fail_alloc: + + *rm_in_out = NULL; + EFRM_ERR("%s: failed rc=%d", __func__, rc); + return rc; +} + +int +efrm_create_vi_resource_manager(struct efrm_resource_manager **rm_out, + const struct vi_resource_dimensions *dims) +{ + return efrm_create_or_destroy_vi_resource_manager(rm_out, dims, false); +} + +static void efrm_vi_rm_dtor(struct efrm_resource_manager *rm) +{ + efrm_create_or_destroy_vi_resource_manager(&rm, NULL, true); +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/kernel_compat.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/kernel_compat.c @@ -0,0 +1,118 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides compatibility layer for various Linux kernel versions + * (starting from 2.6.9 RHEL kernel). + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#define IN_KERNEL_COMPAT_C +#include +#include +#include "kernel_compat.h" + +/* Set this to 1 to enable very basic counting of iopage(s) allocations, then + * call dump_iopage_counts() to show the number of current allocations of + * orders 0-7. + */ +#define EFRM_IOPAGE_COUNTS_ENABLED 0 + + +/**************************************************************************** + * + * allocate a buffer suitable for DMA to/from the NIC + * + ****************************************************************************/ + +#if EFRM_IOPAGE_COUNTS_ENABLED + +static int iopage_counts[8]; + +void dump_iopage_counts(void) +{ + EFRM_NOTICE("iopage counts: %d %d %d %d %d %d %d %d", iopage_counts[0], + iopage_counts[1], iopage_counts[2], iopage_counts[3], + iopage_counts[4], iopage_counts[5], iopage_counts[6], + iopage_counts[7]); +} + +#endif + + + +/*********** pci_alloc_consistent / pci_free_consistent ***********/ + +void *efrm_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, int flag) +{ + void *ptr; + unsigned order; + + order = __ffs(size/PAGE_SIZE); + EFRM_ASSERT(size == (PAGE_SIZE< + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include + +#ifndef NDEBUG +#include +#include +#include + +void +efrm_resource_manager_assert_valid(struct efrm_resource_manager *rm, + const char *file, int line) +{ + _EFRM_ASSERT(rm, file, line); + _EFRM_ASSERT(rm->rm_name, file, line); + _EFRM_ASSERT(rm->rm_type < EFRM_RESOURCE_NUM, file, line); + _EFRM_ASSERT(rm->rm_dtor, file, line); +} +EXPORT_SYMBOL(efrm_resource_manager_assert_valid); + +/* + * \param rs resource to validate + * \param ref_count_is_zero One of 3 values + * > 0 - check ref count is zero + * = 0 - check ref count is non-zero + * < 0 - ref count could be any value + */ +void +efrm_resource_assert_valid(struct efrm_resource *rs, int ref_count_is_zero, + const char *file, int line) +{ + struct efrm_resource_manager *rm; + + _EFRM_ASSERT(rs, file, line); + + if (ref_count_is_zero >= 0) { + if (!(ref_count_is_zero || rs->rs_ref_count > 0) + || !(!ref_count_is_zero || rs->rs_ref_count == 0)) + EFRM_WARN("%s: check %szero ref=%d " EFRM_RESOURCE_FMT, + __func__, + ref_count_is_zero == 0 ? "non-" : "", + rs->rs_ref_count, + EFRM_RESOURCE_PRI_ARG(rs->rs_handle)); + + _EFRM_ASSERT(!(ref_count_is_zero == 0) || + rs->rs_ref_count != 0, file, line); + _EFRM_ASSERT(!(ref_count_is_zero > 0) || + rs->rs_ref_count == 0, file, line); + } + + rm = efrm_rm_table[EFRM_RESOURCE_TYPE(rs->rs_handle)]; + efrm_resource_manager_assert_valid(rm, file, line); +} +EXPORT_SYMBOL(efrm_resource_assert_valid); + +#endif --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/eventq.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/eventq.c @@ -0,0 +1,321 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains event queue support. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include + +#define KEVENTQ_MAGIC 0x07111974 + +/*! Helper function to allocate the iobuffer needed by an eventq + * - it ensures the eventq has the correct alignment for the NIC + * + * \param rm Event-queue resource manager + * \param instance Event-queue instance (index) + * \param buf_bytes Requested size of eventq + * \return < 0 if iobuffer allocation fails + */ +int +efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, + struct eventq_resource_hardware *h, + int evq_instance, unsigned buf_bytes) +{ + unsigned int page_order; + int rc; + + /* Allocate an iobuffer. */ + page_order = get_order(buf_bytes); + + h->iobuff_off = 0; + + EFHW_TRACE("allocating eventq size %x", + 1u << (page_order + PAGE_SHIFT)); + rc = efhw_iopages_alloc(nic, &h->iobuff, page_order); + if (rc < 0) { + EFHW_WARN("%s: failed to allocate %u pages", + __func__, 1u << page_order); + return rc; + } + + /* Set the eventq pages to match EFHW_CLEAR_EVENT() */ + if (EFHW_CLEAR_EVENT_VALUE) + memset(efhw_iopages_ptr(&h->iobuff) + h->iobuff_off, + EFHW_CLEAR_EVENT_VALUE, (1u << page_order) * PAGE_SIZE); + + EFHW_TRACE("%s: allocated %u pages", __func__, 1u << (page_order)); + + /* For Falcon the NIC is programmed with the base buffer address of a + * contiguous region of buffer space. This means that larger than a + * PAGE event queues can be expected to allocate even when the host's + * physical memory is fragmented */ + EFHW_ASSERT(efhw_nic_have_hw(nic)); + EFHW_ASSERT(page_order <= h->buf_tbl_alloc.order); + + /* Initialise the buffer table entries. */ + falcon_nic_buffer_table_set_n(nic, h->buf_tbl_alloc.base, + efhw_iopages_dma_addr(&h->iobuff) + + h->iobuff_off, EFHW_NIC_PAGE_SIZE, 0, + 1 << page_order, 0); + + if (evq_instance >= FALCON_EVQ_TBL_RESERVED) + falcon_nic_buffer_table_confirm(nic); + return 0; +} + +/********************************************************************** + * Kernel event queue management. + */ + +/* Values for [struct efhw_keventq::lock] field. */ +#define KEVQ_UNLOCKED 0 +#define KEVQ_LOCKED 1 +#define KEVQ_RECHECK 2 + +int +efhw_keventq_ctor(struct efhw_nic *nic, int instance, + struct efhw_keventq *evq, + struct efhw_ev_handler *ev_handlers) +{ + int rc; + unsigned buf_bytes = evq->hw.capacity * sizeof(efhw_event_t); + + evq->instance = instance; + evq->ev_handlers = ev_handlers; + + /* allocate an IObuffer for the eventq */ + rc = efhw_nic_event_queue_alloc_iobuffer(nic, &evq->hw, evq->instance, + buf_bytes); + if (rc < 0) + return rc; + + /* Zero the timer-value for this queue. + AND Tell the nic about the event queue. */ + efhw_nic_event_queue_enable(nic, evq->instance, evq->hw.capacity, + efhw_iopages_dma_addr(&evq->hw.iobuff) + + evq->hw.iobuff_off, + evq->hw.buf_tbl_alloc.base, + 1 /* interrupting */); + + evq->lock = KEVQ_UNLOCKED; + evq->evq_base = efhw_iopages_ptr(&evq->hw.iobuff) + evq->hw.iobuff_off; + evq->evq_ptr = 0; + evq->evq_mask = (evq->hw.capacity * sizeof(efhw_event_t)) - 1u; + + EFHW_TRACE("%s: [%d] base=%p end=%p", __func__, evq->instance, + evq->evq_base, evq->evq_base + buf_bytes); + + return 0; +} + +void efhw_keventq_dtor(struct efhw_nic *nic, struct efhw_keventq *evq) +{ + EFHW_ASSERT(evq); + + EFHW_TRACE("%s: [%d]", __func__, evq->instance); + + /* Zero the timer-value for this queue. + And Tell NIC to stop using this event queue. */ + efhw_nic_event_queue_disable(nic, evq->instance, 0); + + /* free the pages used by the eventq itself */ + efhw_iopages_free(nic, &evq->hw.iobuff); +} + +void +efhw_handle_txdmaq_flushed(struct efhw_nic *nic, struct efhw_ev_handler *h, + efhw_event_t *evp) +{ + int instance = (int)FALCON_EVENT_TX_FLUSH_Q_ID(evp); + EFHW_TRACE("%s: instance=%d", __func__, instance); + + if (!h->dmaq_flushed_fn) { + EFHW_WARN("%s: no handler registered", __func__); + return; + } + + h->dmaq_flushed_fn(nic, instance, false); +} + +void +efhw_handle_rxdmaq_flushed(struct efhw_nic *nic, struct efhw_ev_handler *h, + efhw_event_t *evp) +{ + unsigned instance = (unsigned)FALCON_EVENT_RX_FLUSH_Q_ID(evp); + EFHW_TRACE("%s: instance=%d", __func__, instance); + + if (!h->dmaq_flushed_fn) { + EFHW_WARN("%s: no handler registered", __func__); + return; + } + + h->dmaq_flushed_fn(nic, instance, true); +} + +void +efhw_handle_wakeup_event(struct efhw_nic *nic, struct efhw_ev_handler *h, + efhw_event_t *evp) +{ + unsigned instance = (unsigned)FALCON_EVENT_WAKE_EVQ_ID(evp); + + if (!h->wakeup_fn) { + EFHW_WARN("%s: no handler registered", __func__); + return; + } + + h->wakeup_fn(nic, instance); +} + +void +efhw_handle_timeout_event(struct efhw_nic *nic, struct efhw_ev_handler *h, + efhw_event_t *evp) +{ + unsigned instance = (unsigned)FALCON_EVENT_WAKE_EVQ_ID(evp); + + if (!h->timeout_fn) { + EFHW_WARN("%s: no handler registered", __func__); + return; + } + + h->timeout_fn(nic, instance); +} + +/********************************************************************** + * Kernel event queue event handling. + */ + +int efhw_keventq_poll(struct efhw_nic *nic, struct efhw_keventq *q) +{ + efhw_event_t *ev; + int l, count = 0; + + EFHW_ASSERT(nic); + EFHW_ASSERT(q); + EFHW_ASSERT(q->ev_handlers); + + /* Acquire the lock, or mark the queue as needing re-checking. */ + for (;;) { + l = q->lock; + if (l == KEVQ_UNLOCKED) { + if ((int)cmpxchg(&q->lock, l, KEVQ_LOCKED) == l) + break; + } else if (l == KEVQ_LOCKED) { + if ((int)cmpxchg(&q->lock, l, KEVQ_RECHECK) == l) + return 0; + } else { /* already marked for re-checking */ + EFHW_ASSERT(l == KEVQ_RECHECK); + return 0; + } + } + + if (unlikely(EFHW_EVENT_OVERFLOW(q, q))) + goto overflow; + + ev = EFHW_EVENT_PTR(q, q, 0); + +#ifndef NDEBUG + if (!EFHW_IS_EVENT(ev)) + EFHW_TRACE("%s: %d NO EVENTS!", __func__, q->instance); +#endif + + for (;;) { + /* Convention for return codes for handlers is: + ** 0 - no error, event consumed + ** 1 - no error, event not consumed + ** -ve - error, event not consumed + */ + if (likely(EFHW_IS_EVENT(ev))) { + count++; + + switch (FALCON_EVENT_CODE(ev)) { + + case FALCON_EVENT_CODE_CHAR: + falcon_handle_char_event(nic, q->ev_handlers, + ev); + break; + + default: + EFHW_ERR("efhw_keventq_poll: [%d] UNEXPECTED " + "EVENT:"FALCON_EVENT_FMT, + q->instance, + FALCON_EVENT_PRI_ARG(*ev)); + } + + EFHW_CLEAR_EVENT(ev); + EFHW_EVENTQ_NEXT(q); + + ev = EFHW_EVENT_PTR(q, q, 0); + } else { + /* No events left. Release the lock (checking if we + * need to re-poll to avoid race). */ + l = q->lock; + if (l == KEVQ_LOCKED) { + if ((int)cmpxchg(&q->lock, l, KEVQ_UNLOCKED) + == l) { + EFHW_TRACE + ("efhw_keventq_poll: %d clean exit", + q->instance); + goto clean_exit; + } + } + + /* Potentially more work to do. */ + l = q->lock; + EFHW_ASSERT(l == KEVQ_RECHECK); + EFHW_TEST((int)cmpxchg(&q->lock, l, KEVQ_LOCKED) == l); + EFHW_TRACE("efhw_keventq_poll: %d re-poll required", + q->instance); + } + } + + /* shouldn't get here */ + EFHW_ASSERT(0); + +overflow: + /* ?? Oh dear. Should we poll everything that could have possibly + ** happened? Or merely cry out in anguish... + */ + EFHW_WARN("efhw_keventq_poll: %d ***** OVERFLOW nic %d *****", + q->instance, nic->index); + + q->lock = KEVQ_UNLOCKED; + return count; + +clean_exit: + /* Ack the processed events so that this event queue can potentially + raise interrupts again */ + falcon_nic_evq_ack(nic, q->instance, + (EFHW_EVENT_OFFSET(q, q, 0) / sizeof(efhw_event_t)), + false); + return count; +} --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/filter_resource.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/filter_resource.c @@ -0,0 +1,250 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains filters support. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "efrm_internal.h" + + +struct filter_resource_manager { + struct efrm_resource_manager rm; + struct kfifo *free_ids; +}; + +static struct filter_resource_manager *efrm_filter_manager; + + +void efrm_filter_resource_free(struct filter_resource *frs) +{ + struct efhw_nic *nic = frs->rs.rs_client->nic; + int id; + + EFRM_RESOURCE_ASSERT_VALID(&frs->rs, 1); + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT, __func__, + EFRM_RESOURCE_PRI_ARG(frs->rs.rs_handle)); + + efhw_nic_ipfilter_clear(nic, frs->filter_idx); + frs->filter_idx = -1; + efrm_vi_resource_release(frs->pt); + + /* Free this filter. */ + id = EFRM_RESOURCE_INSTANCE(frs->rs.rs_handle); + EFRM_VERIFY_EQ(kfifo_put(efrm_filter_manager->free_ids, + (unsigned char *)&id, sizeof(id)), + sizeof(id)); + + efrm_client_put(frs->rs.rs_client); + EFRM_DO_DEBUG(memset(frs, 0, sizeof(*frs))); + kfree(frs); +} +EXPORT_SYMBOL(efrm_filter_resource_free); + + +void efrm_filter_resource_release(struct filter_resource *frs) +{ + if (__efrm_resource_release(&frs->rs)) + efrm_filter_resource_free(frs); +} +EXPORT_SYMBOL(efrm_filter_resource_release); + + +static void filter_rm_dtor(struct efrm_resource_manager *rm) +{ + EFRM_TRACE("%s:", __func__); + + EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_filter_manager->rm); + EFRM_ASSERT(&efrm_filter_manager->rm == rm); + + kfifo_vfree(efrm_filter_manager->free_ids); + EFRM_TRACE("%s: done", __func__); +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ + +int efrm_create_filter_resource_manager(struct efrm_resource_manager **rm_out) +{ + int rc; + + EFRM_ASSERT(rm_out); + + efrm_filter_manager = + kmalloc(sizeof(struct filter_resource_manager), GFP_KERNEL); + if (efrm_filter_manager == 0) + return -ENOMEM; + memset(efrm_filter_manager, 0, sizeof(*efrm_filter_manager)); + + rc = efrm_resource_manager_ctor(&efrm_filter_manager->rm, + filter_rm_dtor, "FILTER", + EFRM_RESOURCE_FILTER); + if (rc < 0) + goto fail1; + + /* Create a pool of free instances */ + rc = efrm_kfifo_id_ctor(&efrm_filter_manager->free_ids, + 0, EFHW_IP_FILTER_NUM, + &efrm_filter_manager->rm.rm_lock); + if (rc != 0) + goto fail2; + + *rm_out = &efrm_filter_manager->rm; + EFRM_TRACE("%s: filter resources created - %d IDs", + __func__, kfifo_len(efrm_filter_manager->free_ids)); + return 0; + +fail2: + efrm_resource_manager_dtor(&efrm_filter_manager->rm); +fail1: + memset(efrm_filter_manager, 0, sizeof(*efrm_filter_manager)); + kfree(efrm_filter_manager); + return rc; + +} + + +int efrm_filter_resource_clear(struct filter_resource *frs) +{ + struct efhw_nic *nic = frs->rs.rs_client->nic; + + efhw_nic_ipfilter_clear(nic, frs->filter_idx); + frs->filter_idx = -1; + return 0; +} +EXPORT_SYMBOL(efrm_filter_resource_clear); + + +int +__efrm_filter_resource_set(struct filter_resource *frs, int type, + unsigned saddr, uint16_t sport, + unsigned daddr, uint16_t dport) +{ + struct efhw_nic *nic = frs->rs.rs_client->nic; + int vi_instance; + + EFRM_ASSERT(frs); + + if (efrm_nic_tablep->a_nic->devtype.variant >= 'B' && + (frs->pt->flags & EFHW_VI_JUMBO_EN) == 0) + type |= EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK; + vi_instance = EFRM_RESOURCE_INSTANCE(frs->pt->rs.rs_handle); + + return efhw_nic_ipfilter_set(nic, type, &frs->filter_idx, + vi_instance, saddr, sport, daddr, dport); +} +EXPORT_SYMBOL(__efrm_filter_resource_set);; + + +int +efrm_filter_resource_alloc(struct vi_resource *vi_parent, + struct filter_resource **frs_out) +{ + struct filter_resource *frs; + int rc, instance; + + EFRM_ASSERT(frs_out); + EFRM_ASSERT(efrm_filter_manager); + EFRM_RESOURCE_MANAGER_ASSERT_VALID(&efrm_filter_manager->rm); + EFRM_ASSERT(vi_parent != NULL); + EFRM_ASSERT(EFRM_RESOURCE_TYPE(vi_parent->rs.rs_handle) == + EFRM_RESOURCE_VI); + + /* Allocate resource data structure. */ + frs = kmalloc(sizeof(struct filter_resource), GFP_KERNEL); + if (!frs) + return -ENOMEM; + + /* Allocate an instance. */ + rc = kfifo_get(efrm_filter_manager->free_ids, + (unsigned char *)&instance, sizeof(instance)); + if (rc != sizeof(instance)) { + EFRM_TRACE("%s: out of instances", __func__); + EFRM_ASSERT(rc == 0); + rc = -EBUSY; + goto fail1; + } + + /* Initialise the resource DS. */ + efrm_resource_init(&frs->rs, EFRM_RESOURCE_FILTER, instance); + frs->pt = vi_parent; + efrm_resource_ref(&frs->pt->rs); + frs->filter_idx = -1; + + EFRM_TRACE("%s: " EFRM_RESOURCE_FMT " VI %d", __func__, + EFRM_RESOURCE_PRI_ARG(frs->rs.rs_handle), + EFRM_RESOURCE_INSTANCE(vi_parent->rs.rs_handle)); + + efrm_client_add_resource(vi_parent->rs.rs_client, &frs->rs); + *frs_out = frs; + return 0; + +fail1: + memset(frs, 0, sizeof(*frs)); + kfree(frs); + return rc; +} +EXPORT_SYMBOL(efrm_filter_resource_alloc); + + +int efrm_filter_resource_instance(struct filter_resource *frs) +{ + return EFRM_RESOURCE_INSTANCE(frs->rs.rs_handle); +} +EXPORT_SYMBOL(efrm_filter_resource_instance); + + +struct efrm_resource * +efrm_filter_resource_to_resource(struct filter_resource *frs) +{ + return &frs->rs; +} +EXPORT_SYMBOL(efrm_filter_resource_to_resource); + + +struct filter_resource * +efrm_filter_resource_from_resource(struct efrm_resource *rs) +{ + return filter_resource(rs); +} +EXPORT_SYMBOL(efrm_filter_resource_from_resource); --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/resource_driver.c +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/resource_driver.c @@ -0,0 +1,600 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains main driver entry points. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "linux_resource_internal.h" +#include "kernel_compat.h" +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Solarflare Communications"); +MODULE_LICENSE("GPL"); + +static struct efhw_ev_handler ev_handler = { + .wakeup_fn = efrm_handle_wakeup_event, + .timeout_fn = efrm_handle_timeout_event, + .dmaq_flushed_fn = efrm_handle_dmaq_flushed, +}; + +const int max_hardware_init_repeats = 10; + +/*-------------------------------------------------------------------- + * + * Module load time variables + * + *--------------------------------------------------------------------*/ +/* See docs/notes/pci_alloc_consistent */ +static int do_irq = 1; /* enable interrupts */ + +#if defined(CONFIG_X86_XEN) +static int irq_moderation = 60; /* interrupt moderation (60 usec) */ +#else +static int irq_moderation = 20; /* interrupt moderation (20 usec) */ +#endif +static int nic_options = NIC_OPT_DEFAULT; +int efx_vi_eventq_size = EFX_VI_EVENTQ_SIZE_DEFAULT; + +module_param(do_irq, int, S_IRUGO); +MODULE_PARM_DESC(do_irq, "Enable interrupts. " + "Do not turn it off unless you know what are you doing."); +module_param(irq_moderation, int, S_IRUGO); +MODULE_PARM_DESC(irq_moderation, "IRQ moderation in usec"); +module_param(nic_options, int, S_IRUGO); +MODULE_PARM_DESC(nic_options, "Nic options -- see efhw_types.h"); +module_param(efx_vi_eventq_size, int, S_IRUGO); +MODULE_PARM_DESC(efx_vi_eventq_size, + "Size of event queue allocated by efx_vi library"); + +/*-------------------------------------------------------------------- + * + * Linux specific NIC initialisation + * + *--------------------------------------------------------------------*/ + +static inline irqreturn_t +linux_efrm_interrupt(int irr, void *dev_id) +{ + return efhw_nic_interrupt((struct efhw_nic *)dev_id); +} + +int linux_efrm_irq_ctor(struct linux_efhw_nic *lnic) +{ + struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; + + nic->flags &= ~NIC_FLAG_MSI; + if (nic->flags & NIC_FLAG_TRY_MSI) { + int rc = pci_enable_msi(lnic->pci_dev); + if (rc < 0) { + EFRM_WARN("%s: Could not enable MSI (%d)", + __func__, rc); + EFRM_WARN("%s: Continuing with legacy interrupt mode", + __func__); + } else { + EFRM_NOTICE("%s: MSI enabled", __func__); + nic->flags |= NIC_FLAG_MSI; + } + } + + if (request_irq(lnic->pci_dev->irq, linux_efrm_interrupt, + IRQF_SHARED, "sfc_resource", nic)) { + EFRM_ERR("Request for interrupt #%d failed", + lnic->pci_dev->irq); + nic->flags &= ~NIC_FLAG_OS_IRQ_EN; + return -EBUSY; + } + nic->flags |= NIC_FLAG_OS_IRQ_EN; + + return 0; +} + +void linux_efrm_irq_dtor(struct linux_efhw_nic *lnic) +{ + EFRM_TRACE("%s: start", __func__); + + if (lnic->efrm_nic.efhw_nic.flags & NIC_FLAG_OS_IRQ_EN) { + free_irq(lnic->pci_dev->irq, &lnic->efrm_nic.efhw_nic); + lnic->efrm_nic.efhw_nic.flags &= ~NIC_FLAG_OS_IRQ_EN; + } + + if (lnic->efrm_nic.efhw_nic.flags & NIC_FLAG_MSI) { + pci_disable_msi(lnic->pci_dev); + lnic->efrm_nic.efhw_nic.flags &= ~NIC_FLAG_MSI; + } + + EFRM_TRACE("%s: done", __func__); +} + +/* Allocate buffer table entries for a particular NIC. + */ +static int efrm_nic_buffer_table_alloc(struct efhw_nic *nic) +{ + int capacity; + int page_order; + int rc; + + /* Choose queue size. */ + for (capacity = 8192; capacity <= nic->evq_sizes; capacity <<= 1) { + if (capacity > nic->evq_sizes) { + EFRM_ERR + ("%s: Unable to choose EVQ size (supported=%x)", + __func__, nic->evq_sizes); + return -E2BIG; + } else if (capacity & nic->evq_sizes) + break; + } + + nic->interrupting_evq.hw.capacity = capacity; + nic->interrupting_evq.hw.buf_tbl_alloc.base = (unsigned)-1; + + nic->non_interrupting_evq.hw.capacity = capacity; + nic->non_interrupting_evq.hw.buf_tbl_alloc.base = (unsigned)-1; + + /* allocate buffer table entries to map onto the iobuffer */ + page_order = get_order(capacity * sizeof(efhw_event_t)); + if (!(nic->flags & NIC_FLAG_NO_INTERRUPT)) { + rc = efrm_buffer_table_alloc(page_order, + &nic->interrupting_evq + .hw.buf_tbl_alloc); + if (rc < 0) { + EFRM_WARN + ("%s: failed (%d) to alloc %d buffer table entries", + __func__, rc, page_order); + return rc; + } + } + rc = efrm_buffer_table_alloc(page_order, + &nic->non_interrupting_evq.hw. + buf_tbl_alloc); + if (rc < 0) { + EFRM_WARN + ("%s: failed (%d) to alloc %d buffer table entries", + __func__, rc, page_order); + return rc; + } + + return 0; +} + +/* Free buffer table entries allocated for a particular NIC. + */ +static void efrm_nic_buffer_table_free(struct efhw_nic *nic) +{ + if (nic->interrupting_evq.hw.buf_tbl_alloc.base != (unsigned)-1) + efrm_buffer_table_free(&nic->interrupting_evq.hw + .buf_tbl_alloc); + if (nic->non_interrupting_evq.hw.buf_tbl_alloc.base != (unsigned)-1) + efrm_buffer_table_free(&nic->non_interrupting_evq + .hw.buf_tbl_alloc); +} + +static int iomap_bar(struct linux_efhw_nic *lnic, size_t len) +{ + volatile char __iomem *ioaddr; + + ioaddr = ioremap_nocache(lnic->ctr_ap_pci_addr, len); + if (ioaddr == 0) + return -ENOMEM; + + lnic->efrm_nic.efhw_nic.bar_ioaddr = ioaddr; + return 0; +} + +static int linux_efhw_nic_map_ctr_ap(struct linux_efhw_nic *lnic) +{ + struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; + int rc; + + rc = iomap_bar(lnic, nic->ctr_ap_bytes); + + /* Bug 5195: workaround for now. */ + if (rc != 0 && nic->ctr_ap_bytes > 16 * 1024 * 1024) { + /* Try half the size for now. */ + nic->ctr_ap_bytes /= 2; + EFRM_WARN("Bug 5195 WORKAROUND: retrying iomap of %d bytes", + nic->ctr_ap_bytes); + rc = iomap_bar(lnic, nic->ctr_ap_bytes); + } + + if (rc < 0) { + EFRM_ERR("Failed (%d) to map bar (%d bytes)", + rc, nic->ctr_ap_bytes); + return rc; + } + + return rc; +} + +int +linux_efrm_nic_ctor(struct linux_efhw_nic *lnic, struct pci_dev *dev, + spinlock_t *reg_lock, + unsigned nic_flags, unsigned nic_options) +{ + struct efhw_device_type dev_type; + struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; + u8 class_revision; + int rc; + + rc = pci_read_config_byte(dev, PCI_CLASS_REVISION, &class_revision); + if (rc != 0) { + EFRM_ERR("%s: pci_read_config_byte failed (%d)", + __func__, rc); + return rc; + } + + if (!efhw_device_type_init(&dev_type, dev->vendor, dev->device, + class_revision)) { + EFRM_ERR("%s: efhw_device_type_init failed %04x:%04x(%d)", + __func__, (unsigned) dev->vendor, + (unsigned) dev->device, (int) class_revision); + return -ENODEV; + } + + EFRM_NOTICE("attaching device type %04x:%04x %d:%c%d", + (unsigned) dev->vendor, (unsigned) dev->device, + dev_type.arch, dev_type.variant, dev_type.revision); + + /* Initialise the adapter-structure. */ + efhw_nic_init(nic, nic_flags, nic_options, dev_type); + lnic->pci_dev = dev; + + rc = pci_enable_device(dev); + if (rc < 0) { + EFRM_ERR("%s: pci_enable_device failed (%d)", + __func__, rc); + return rc; + } + + lnic->ctr_ap_pci_addr = pci_resource_start(dev, nic->ctr_ap_bar); + + if (!pci_dma_supported(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { + EFRM_ERR("%s: pci_dma_supported(%lx) failed", __func__, + (unsigned long)EFHW_DMA_ADDRMASK); + return -ENODEV; + } + + if (pci_set_dma_mask(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { + EFRM_ERR("%s: pci_set_dma_mask(%lx) failed", __func__, + (unsigned long)EFHW_DMA_ADDRMASK); + return -ENODEV; + } + + if (pci_set_consistent_dma_mask(dev, (dma_addr_t)EFHW_DMA_ADDRMASK)) { + EFRM_ERR("%s: pci_set_consistent_dma_mask(%lx) failed", + __func__, (unsigned long)EFHW_DMA_ADDRMASK); + return -ENODEV; + } + + rc = linux_efhw_nic_map_ctr_ap(lnic); + if (rc < 0) + return rc; + + /* By default struct efhw_nic contains its own lock for protecting + * access to nic registers. We override it with a pointer to the + * lock in the net driver. This is needed when resource and net + * drivers share a single PCI function (falcon B series). + */ + nic->reg_lock = reg_lock; + return 0; +} + +void linux_efrm_nic_dtor(struct linux_efhw_nic *lnic) +{ + struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; + volatile char __iomem *bar_ioaddr = nic->bar_ioaddr; + + efhw_nic_dtor(nic); + + /* Unmap the bar. */ + EFRM_ASSERT(bar_ioaddr); + iounmap(bar_ioaddr); + nic->bar_ioaddr = 0; +} + +/**************************************************************************** + * + * efrm_tasklet - used to poll the eventq which may result in further callbacks + * + ****************************************************************************/ + +static void efrm_tasklet(unsigned long pdev) +{ + struct efhw_nic *nic = (struct efhw_nic *)pdev; + + EFRM_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); + + efhw_keventq_poll(nic, &nic->interrupting_evq); + EFRM_TRACE("%s: complete", __func__); +} + +/**************************************************************************** + * + * char driver specific interrupt callbacks -- run at hard IRQL + * + ****************************************************************************/ +static void efrm_handle_eventq_irq(struct efhw_nic *nic, int evq) +{ + /* NB. The interrupt must have already been acked (for legacy mode). */ + + EFRM_TRACE("%s: starting tasklet", __func__); + EFRM_ASSERT(!(nic->flags & NIC_FLAG_NO_INTERRUPT)); + + tasklet_schedule(&linux_efhw_nic(nic)->tasklet); +} + +/* A count of how many NICs this driver knows about. */ +static int n_nics_probed; + +/**************************************************************************** + * + * efrm_nic_add: add the NIC to the resource driver + * + * NOTE: the flow of control through this routine is quite subtle + * because of the number of operations that can fail. We therefore + * take the apporaching of keeping the return code (rc) variable + * accurate, and only do operations while it is non-negative. Tear down + * is done at the end if rc is negative, depending on what has been set up + * by that point. + * + * So basically just make sure that any code you add checks rc>=0 before + * doing any work and you'll be fine. + * + ****************************************************************************/ +int +efrm_nic_add(struct pci_dev *dev, unsigned flags, const uint8_t *mac_addr, + struct linux_efhw_nic **lnic_out, spinlock_t *reg_lock, + int bt_min, int bt_lim, int non_irq_evq, + const struct vi_resource_dimensions *res_dim) +{ + struct linux_efhw_nic *lnic = NULL; + struct efhw_nic *nic = NULL; + int count = 0, rc = 0, resources_init = 0; + int constructed = 0; + int registered_nic = 0; + int buffers_allocated = 0; + static unsigned nic_index; /* = 0; */ + + EFRM_TRACE("%s: device detected (Slot '%s', IRQ %d)", __func__, + pci_name(dev) ? pci_name(dev) : "?", dev->irq); + + /* Ensure that we have room for the new adapter-structure. */ + if (efrm_nic_tablep->nic_count == EFHW_MAX_NR_DEVS) { + EFRM_WARN("%s: WARNING: too many devices", __func__); + rc = -ENOMEM; + goto failed; + } + + if (n_nics_probed == 0) { + rc = efrm_resources_init(res_dim, bt_min, bt_lim); + if (rc != 0) + goto failed; + resources_init = 1; + } + + /* Allocate memory for the new adapter-structure. */ + lnic = kmalloc(sizeof(*lnic), GFP_KERNEL); + if (lnic == NULL) { + EFRM_ERR("%s: ERROR: failed to allocate memory", __func__); + rc = -ENOMEM; + goto failed; + } + memset(lnic, 0, sizeof(*lnic)); + nic = &lnic->efrm_nic.efhw_nic; + + lnic->ev_handlers = &ev_handler; + + /* OS specific hardware mappings */ + rc = linux_efrm_nic_ctor(lnic, dev, reg_lock, flags, nic_options); + if (rc < 0) { + EFRM_ERR("%s: ERROR: initialisation failed", __func__); + goto failed; + } + + constructed = 1; + + /* Tell the driver about the NIC - this needs to be done before the + resources managers get created below. Note we haven't initialised + the hardware yet, and I don't like doing this before the perhaps + unreliable hardware initialisation. However, there's quite a lot + of code to review if we wanted to hardware init before bringing + up the resource managers. */ + rc = efrm_driver_register_nic(&lnic->efrm_nic, nic_index, + /* TODO: ifindex */ nic_index); + if (rc < 0) { + EFRM_ERR("%s: cannot register nic %d with nic error code %d", + __func__, efrm_nic_tablep->nic_count, rc); + goto failed; + } + ++nic_index; + registered_nic = 1; + + rc = efrm_nic_buffer_table_alloc(nic); + if (rc < 0) + goto failed; + buffers_allocated = 1; + + /****************************************************/ + /* hardware bringup */ + /****************************************************/ + /* Detecting hardware can be a slightly unreliable process; + we want to make sure that we maximise our chances, so we + loop a few times until all is good. */ + for (count = 0; count < max_hardware_init_repeats; count++) { + rc = efhw_nic_init_hardware(nic, &ev_handler, mac_addr, + non_irq_evq); + if (rc >= 0) + break; + + /* pain */ + EFRM_ERR + ("error - hardware initialisation failed code %d, " + "attempt %d of %d", rc, count + 1, + max_hardware_init_repeats); + } + if (rc < 0) + goto failed; + + tasklet_init(&lnic->tasklet, efrm_tasklet, (ulong)nic); + + /* set up interrupt handlers (hard-irq) */ + nic->irq_handler = &efrm_handle_eventq_irq; + + /* this device can now take management interrupts */ + if (do_irq && !(nic->flags & NIC_FLAG_NO_INTERRUPT)) { + rc = linux_efrm_irq_ctor(lnic); + if (rc < 0) { + EFRM_ERR("Interrupt initialisation failed (%d)", rc); + goto failed; + } + efhw_nic_set_interrupt_moderation(nic, -1, irq_moderation); + efhw_nic_interrupt_enable(nic); + } + EFRM_TRACE("interrupts are %sregistered", do_irq ? "" : "not "); + + *lnic_out = lnic; + EFRM_ASSERT(rc == 0); + ++n_nics_probed; + return 0; + +failed: + if (buffers_allocated) + efrm_nic_buffer_table_free(nic); + if (registered_nic) + efrm_driver_unregister_nic(&lnic->efrm_nic); + if (constructed) + linux_efrm_nic_dtor(lnic); + kfree(lnic); /* safe in any case */ + if (resources_init) + efrm_resources_fini(); + return rc; +} + +/**************************************************************************** + * + * efrm_nic_del: Remove the nic from the resource driver structures + * + ****************************************************************************/ +void efrm_nic_del(struct linux_efhw_nic *lnic) +{ + struct efhw_nic *nic = &lnic->efrm_nic.efhw_nic; + + EFRM_TRACE("%s:", __func__); + EFRM_ASSERT(nic); + + efrm_nic_buffer_table_free(nic); + + efrm_driver_unregister_nic(&lnic->efrm_nic); + + /* + * Synchronise here with any running ISR. + * Remove the OS handler. There should be no IRQs being generated + * by our NIC at this point. + */ + if (efhw_nic_have_functional_units(nic)) { + efhw_nic_close_interrupts(nic); + linux_efrm_irq_dtor(lnic); + tasklet_kill(&lnic->tasklet); + } + + /* Close down hardware and free resources. */ + linux_efrm_nic_dtor(lnic); + kfree(lnic); + + if (--n_nics_probed == 0) + efrm_resources_fini(); + + EFRM_TRACE("%s: done", __func__); +} + +/**************************************************************************** + * + * init_module: register as a PCI driver. + * + ****************************************************************************/ +static int init_sfc_resource(void) +{ + int rc = 0; + + EFRM_TRACE("%s: RESOURCE driver starting", __func__); + + efrm_driver_ctor(); + + /* Register the driver so that our 'probe' function is called for + * each EtherFabric device in the system. + */ + rc = efrm_driverlink_register(); + if (rc == -ENODEV) + EFRM_ERR("%s: no devices found", __func__); + if (rc < 0) + goto failed_driverlink; + + if (efrm_install_proc_entries() != 0) { + /* Do not fail, but print a warning */ + EFRM_WARN("%s: WARNING: failed to install /proc entries", + __func__); + } + + return 0; + +failed_driverlink: + efrm_driver_dtor(); + return rc; +} + +/**************************************************************************** + * + * cleanup_module: module-removal entry-point + * + ****************************************************************************/ +static void cleanup_sfc_resource(void) +{ + efrm_uninstall_proc_entries(); + + efrm_driverlink_unregister(); + + /* Clean up char-driver specific initialisation. + - driver dtor can use both work queue and buffer table entries */ + efrm_driver_dtor(); + + EFRM_TRACE("%s: unloaded", __func__); +} + +module_init(init_sfc_resource); +module_exit(cleanup_sfc_resource); --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/Makefile +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/Makefile @@ -0,0 +1,14 @@ +obj-$(CONFIG_SFC_RESOURCE) := sfc_resource.o + +EXTRA_CFLAGS += -D__CI_HARDWARE_CONFIG_FALCON__ +EXTRA_CFLAGS += -D__ci_driver__ +EXTRA_CFLAGS += -Werror +EXTRA_CFLAGS += -Idrivers/net/sfc -Idrivers/net/sfc/sfc_resource + +sfc_resource-objs := resource_driver.o iopage.o efx_vi_shm.o \ + driverlink_new.o kernel_proc.o kfifo.o \ + nic.o eventq.o falcon.o falcon_hash.o \ + assert_valid.o buddy.o buffer_table.o filter_resource.o \ + iobufset_resource.o resource_manager.o resources.o \ + vi_resource_alloc.o vi_resource_event.o vi_resource_flush.o \ + vi_resource_manager.o driver_object.o kernel_compat.o --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/linux_resource_internal.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/linux_resource_internal.h @@ -0,0 +1,76 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains Linux-specific API internal for the resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __LINUX_RESOURCE_INTERNAL__ +#define __LINUX_RESOURCE_INTERNAL__ + +#include +#include +#include +#include + + +/*! Linux specific EtherFabric initialisation */ +extern int +linux_efrm_nic_ctor(struct linux_efhw_nic *, struct pci_dev *, + spinlock_t *reg_lock, + unsigned nic_flags, unsigned nic_options); + +/*! Linux specific EtherFabric initialisation */ +extern void linux_efrm_nic_dtor(struct linux_efhw_nic *); + +/*! Linux specific EtherFabric initialisation -- interrupt registration */ +extern int linux_efrm_irq_ctor(struct linux_efhw_nic *); + +/*! Linux specific EtherFabric initialisation -- interrupt deregistration */ +extern void linux_efrm_irq_dtor(struct linux_efhw_nic *); + +extern int efrm_driverlink_register(void); +extern void efrm_driverlink_unregister(void); + +extern int +efrm_nic_add(struct pci_dev *dev, unsigned int opts, const uint8_t *mac_addr, + struct linux_efhw_nic **lnic_out, spinlock_t *reg_lock, + int bt_min, int bt_max, int non_irq_evq, + const struct vi_resource_dimensions *); +extern void efrm_nic_del(struct linux_efhw_nic *); + + +extern int efrm_install_proc_entries(void); +extern void efrm_uninstall_proc_entries(void); + +#endif /* __LINUX_RESOURCE_INTERNAL__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/nic.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/nic.h @@ -0,0 +1,62 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains API provided by efhw/nic.c file. This file is not + * designed for use outside of the SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_NIC_H__ +#define __CI_EFHW_NIC_H__ + +#include +#include + + +/* Convert PCI info to device type. Returns false when device is not + * recognised. + */ +extern int efhw_device_type_init(struct efhw_device_type *dt, + int vendor_id, int device_id, int revision); + +/* Initialise fields that do not involve touching hardware. */ +extern void efhw_nic_init(struct efhw_nic *nic, unsigned flags, + unsigned options, struct efhw_device_type dev_type); + +/*! Destruct NIC resources */ +extern void efhw_nic_dtor(struct efhw_nic *nic); + +/*! Shutdown interrupts */ +extern void efhw_nic_close_interrupts(struct efhw_nic *nic); + +#endif /* __CI_EFHW_NIC_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/public.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/public.h @@ -0,0 +1,104 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API of efhw library exported from the SFC + * resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_PUBLIC_H__ +#define __CI_EFHW_PUBLIC_H__ + +#include +#include + +/*! Returns true if we have some EtherFabric functional units - + whether configured or not */ +static inline int efhw_nic_have_functional_units(struct efhw_nic *nic) +{ + return nic->efhw_func != 0; +} + +/*! Returns true if the EtherFabric functional units have been configured */ +static inline int efhw_nic_have_hw(struct efhw_nic *nic) +{ + return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0); +} + +/*! Helper function to allocate the iobuffer needed by an eventq + * - it ensures the eventq has the correct alignment for the NIC + * + * \param rm Event-queue resource manager + * \param instance Event-queue instance (index) + * \param buf_bytes Requested size of eventq + * \return < 0 if iobuffer allocation fails + */ +int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, + struct eventq_resource_hardware *h, + int evq_instance, unsigned buf_bytes); + +extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *, + int rx_usr_buf_size); + +/*! Get RX filter search limits from RX_FILTER_CTL_REG. + * use_raw_values = 0 to get actual depth of search, or 1 to get raw values + * from register. + */ +extern void +falcon_nic_get_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values); + +/*! Set RX filter search limits in RX_FILTER_CTL_REG. + * use_raw_values = 0 if specifying actual depth of search, or 1 if specifying + * raw values to write to the register. + */ +extern void +falcon_nic_set_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values); + + +/*! Legacy RX IP filter search depth control interface */ +extern void +falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, + uint32_t tcp_wild, + uint32_t udp_full, uint32_t udp_wild); + +/*! Legacy RX IP filter search depth control interface */ +extern void +falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, + uint32_t *tcp_wild, + uint32_t *udp_full, uint32_t *udp_wild); + +#endif /* __CI_EFHW_PUBLIC_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/efhw_types.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/efhw_types.h @@ -0,0 +1,382 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides struct efhw_nic and some related types. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EFAB_TYPES_H__ +#define __CI_EFHW_EFAB_TYPES_H__ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * forward type declarations + * + *--------------------------------------------------------------------*/ + +struct efhw_nic; + +/*-------------------------------------------------------------------- + * + * Managed interface + * + *--------------------------------------------------------------------*/ + +struct efhw_buffer_table_allocation{ + unsigned base; + unsigned order; +}; + +struct eventq_resource_hardware { + /*!iobuffer allocated for eventq - can be larger than eventq */ + struct efhw_iopages iobuff; + unsigned iobuff_off; + struct efhw_buffer_table_allocation buf_tbl_alloc; + int capacity; /*!< capacity of event queue */ +}; + +/*-------------------------------------------------------------------- + * + * event queues and event driven callbacks + * + *--------------------------------------------------------------------*/ + +struct efhw_keventq { + int lock; + caddr_t evq_base; + int32_t evq_ptr; + uint32_t evq_mask; + unsigned instance; + struct eventq_resource_hardware hw; + struct efhw_ev_handler *ev_handlers; +}; + +/*-------------------------------------------------------------------- + * + * filters + * + *--------------------------------------------------------------------*/ + +struct efhw_filter_spec { + uint dmaq_id; + uint32_t saddr_le32; + uint32_t daddr_le32; + uint16_t sport_le16; + uint16_t dport_le16; + unsigned tcp : 1; + unsigned full : 1; + unsigned rss : 1; /* not supported on A1 */ + unsigned scatter : 1; /* not supported on A1 */ +}; + +struct efhw_filter_depth { + unsigned needed; + unsigned max; +}; + +struct efhw_filter_search_limits { + unsigned tcp_full; + unsigned tcp_wild; + unsigned udp_full; + unsigned udp_wild; +}; + + +/********************************************************************** + * Portable HW interface. *************************************** + **********************************************************************/ + +/*-------------------------------------------------------------------- + * + * EtherFabric Functional units - configuration and control + * + *--------------------------------------------------------------------*/ + +struct efhw_func_ops { + + /*-------------- Initialisation ------------ */ + + /*! close down all hardware functional units - leaves NIC in a safe + state for driver unload */ + void (*close_hardware) (struct efhw_nic *nic); + + /*! initialise all hardware functional units */ + int (*init_hardware) (struct efhw_nic *nic, + struct efhw_ev_handler *, + const uint8_t *mac_addr, int non_irq_evq); + + /*-------------- Interrupt support ------------ */ + + /*! Main interrupt routine + ** This function returns, + ** - zero, if the IRQ was not generated by EF1 + ** - non-zero, if EF1 was the source of the IRQ + ** + ** + ** opaque is an OS provided pointer for use by the OS callbacks + ** e.g in Windows used to indicate DPC scheduled + */ + int (*interrupt) (struct efhw_nic *nic); + + /*! Enable the interrupt */ + void (*interrupt_enable) (struct efhw_nic *nic); + + /*! Disable the interrupt */ + void (*interrupt_disable) (struct efhw_nic *nic); + + /*! Set interrupt moderation strategy for the given IRQ unit + ** val is in usec + */ + void (*set_interrupt_moderation)(struct efhw_nic *nic, int evq, + uint val); + + /*-------------- Event support ------------ */ + + /*! Enable the given event queue + depending on the underlying implementation (EF1 or Falcon) then + either a q_base_addr in host memory, or a buffer base id should + be proivded + */ + void (*event_queue_enable) (struct efhw_nic *nic, + uint evq, /* evnt queue index */ + uint evq_size, /* units of #entries */ + dma_addr_t q_base_addr, uint buf_base_id, + int interrupting); + + /*! Disable the given event queue (and any associated timer) */ + void (*event_queue_disable) (struct efhw_nic *nic, uint evq, + int timer_only); + + /*! request wakeup from the NIC on a given event Q */ + void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr, + int next_i, int evq); + + /*! Push a SW event on a given eventQ */ + void (*sw_event) (struct efhw_nic *nic, int data, int evq); + + /*-------------- IP Filter API ------------ */ + + /*! Setup a given filter - The software can request a filter_i, + * but some EtherFabric implementations will override with + * a more suitable index + */ + int (*ipfilter_set) (struct efhw_nic *nic, int type, + int *filter_i, int dmaq, + unsigned saddr_be32, unsigned sport_be16, + unsigned daddr_be32, unsigned dport_be16); + + /*! Clear down a given filter */ + void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx); + + /*-------------- DMA support ------------ */ + + /*! Initialise NIC state for a given TX DMAQ */ + void (*dmaq_tx_q_init) (struct efhw_nic *nic, + uint dmaq, uint evq, uint owner, uint tag, + uint dmaq_size, uint buf_idx, uint flags); + + /*! Initialise NIC state for a given RX DMAQ */ + void (*dmaq_rx_q_init) (struct efhw_nic *nic, + uint dmaq, uint evq, uint owner, uint tag, + uint dmaq_size, uint buf_idx, uint flags); + + /*! Disable a given TX DMAQ */ + void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq); + + /*! Disable a given RX DMAQ */ + void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq); + + /*! Flush a given TX DMA channel */ + int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq); + + /*! Flush a given RX DMA channel */ + int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq); + + /*-------------- Buffer table Support ------------ */ + + /*! Initialise a buffer table page */ + void (*buffer_table_set) (struct efhw_nic *nic, + dma_addr_t dma_addr, + uint bufsz, uint region, + int own_id, int buffer_id); + + /*! Initialise a block of buffer table pages */ + void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id, + dma_addr_t dma_addr, + uint bufsz, uint region, + int n_pages, int own_id); + + /*! Clear a block of buffer table pages */ + void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id, + int num); + + /*! Commit a buffer table update */ + void (*buffer_table_commit) (struct efhw_nic *nic); + + /*-------------- New filter API ------------ */ + + /*! Set a given filter */ + int (*filter_set) (struct efhw_nic *nic, struct efhw_filter_spec *spec, + int *filter_idx_out); + + /*! Clear a given filter */ + void (*filter_clear) (struct efhw_nic *nic, int filter_idx); +}; + + +/*---------------------------------------------------------------------------- + * + * NIC type + * + *---------------------------------------------------------------------------*/ + +struct efhw_device_type { + int arch; /* enum efhw_arch */ + char variant; /* 'A', 'B', ... */ + int revision; /* 0, 1, ... */ +}; + + +/*---------------------------------------------------------------------------- + * + * EtherFabric NIC instance - nic.c for HW independent functions + * + *---------------------------------------------------------------------------*/ + +/*! */ +struct efhw_nic { + /*! zero base index in efrm_nic_tablep->nic array */ + int index; + int ifindex; /*!< OS level nic index */ + struct net *nd_net; + + struct efhw_device_type devtype; + + /*! Options that can be set by user. */ + unsigned options; +# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */ + +# define NIC_OPT_DEFAULT 0 + + /*! Internal flags that indicate hardware properties at runtime. */ + unsigned flags; +# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */ +# define NIC_FLAG_TRY_MSI 0x02 +# define NIC_FLAG_MSI 0x04 +# define NIC_FLAG_OS_IRQ_EN 0x08 + + unsigned mtu; /*!< MAC MTU (includes MAC hdr) */ + + /* hardware resources */ + + /*! I/O address of the start of the bar */ + volatile char __iomem *bar_ioaddr; + + /*! Bar number of control aperture. */ + unsigned ctr_ap_bar; + /*! Length of control aperture in bytes. */ + unsigned ctr_ap_bytes; + + uint8_t mac_addr[ETH_ALEN]; /*!< mac address */ + + /*! EtherFabric Functional Units -- functions */ + const struct efhw_func_ops *efhw_func; + + /*! This lock protects a number of misc NIC resources. It should + * only be used for things that can be at the bottom of the lock + * order. ie. You mustn't attempt to grab any other lock while + * holding this one. + */ + spinlock_t *reg_lock; + spinlock_t the_reg_lock; + + int buf_commit_outstanding; /*!< outstanding buffer commits */ + + /*! interrupt callbacks (hard-irq) */ + void (*irq_handler) (struct efhw_nic *, int unit); + + /*! event queues per driver */ + struct efhw_keventq interrupting_evq; + +/* for marking when we are not using an IRQ unit + - 0 is a valid offset to an IRQ unit on EF1! */ +#define EFHW_IRQ_UNIT_UNUSED 0xffff + /*! interrupt unit in use for the interrupting event queue */ + unsigned int irq_unit; + + struct efhw_keventq non_interrupting_evq; + + struct efhw_iopage irq_iobuff; /*!< Falcon SYSERR interrupt */ + + /* The new driverlink infrastructure. */ + struct efx_dl_device *net_driver_dev; + struct efx_dlfilt_cb_s *dlfilter_cb; + + /*! Bit masks of the sizes of event queues and dma queues supported + * by the nic. */ + unsigned evq_sizes; + unsigned rxq_sizes; + unsigned txq_sizes; + + /* Size of filter table. */ + unsigned ip_filter_tbl_size; + + /* Number of filters currently used */ + unsigned ip_filter_tbl_used; + + /* Dynamically allocated filter state. */ + uint8_t *filter_in_use; + struct efhw_filter_spec *filter_spec_cache; + + /* Currently required and maximum filter table search depths. */ + struct efhw_filter_depth tcp_full_srch; + struct efhw_filter_depth tcp_wild_srch; + struct efhw_filter_depth udp_full_srch; + struct efhw_filter_depth udp_wild_srch; + + /* Number of event queues, DMA queues and timers. */ + unsigned num_evqs; + unsigned num_dmaqs; + unsigned num_timers; +}; + + +#define EFHW_KVA(nic) ((nic)->bar_ioaddr) + + +#endif /* __CI_EFHW_EFHW_TYPES_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/hardware_sysdep.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/hardware_sysdep.h @@ -0,0 +1,69 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for header files + * with hardware-related definitions (in ci/driver/efab/hardware*). + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_HARDWARE_LINUX_H__ +#define __CI_EFHW_HARDWARE_LINUX_H__ + +#include + +#if defined(__LITTLE_ENDIAN) +#define EFHW_IS_LITTLE_ENDIAN +#elif defined(__BIG_ENDIAN) +#define EFHW_IS_BIG_ENDIAN +#else +#error Unknown endianness +#endif + +#ifndef readq +static inline uint64_t __readq(volatile void __iomem *addr) +{ + return *(volatile uint64_t *)addr; +} +#define readq(x) __readq(x) +#endif + +#ifndef writeq +static inline void __writeq(uint64_t v, volatile void __iomem *addr) +{ + *(volatile uint64_t *)addr = v; +} +#define writeq(val, addr) __writeq((val), (addr)) +#endif + +#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/eventq_macros.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/eventq_macros.h @@ -0,0 +1,77 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides some event-related macros. This file is designed for + * use from kernel and from the userland contexts. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EVENTQ_MACROS_H__ +#define __CI_EFHW_EVENTQ_MACROS_H__ + +#include + +/*-------------------------------------------------------------------- + * + * Event Queue manipulation + * + *--------------------------------------------------------------------*/ + +#define EFHW_EVENT_OFFSET(q, s, i) \ + (((s)->evq_ptr - (i) * (int32_t)sizeof(efhw_event_t)) \ + & (q)->evq_mask) + +#define EFHW_EVENT_PTR(q, s, i) \ + ((efhw_event_t *)((q)->evq_base + EFHW_EVENT_OFFSET(q, s, i))) + +#define EFHW_EVENTQ_NEXT(s) \ + do { ((s)->evq_ptr += sizeof(efhw_event_t)); } while (0) + +#define EFHW_EVENTQ_PREV(s) \ + do { ((s)->evq_ptr -= sizeof(efhw_event_t)); } while (0) + +/* Be worried about this on byteswapped machines */ +/* Due to crazy chipsets, we see the event words being written in +** arbitrary order (bug4539). So test for presence of event must ensure +** that both halves have changed from the null. +*/ +#define EFHW_IS_EVENT(evp) \ + (((evp)->opaque.a != (uint32_t)-1) && \ + ((evp)->opaque.b != (uint32_t)-1)) +#define EFHW_CLEAR_EVENT(evp) ((evp)->u64 = (uint64_t)-1) +#define EFHW_CLEAR_EVENT_VALUE 0xff + +#define EFHW_EVENT_OVERFLOW(evq, s) \ + (EFHW_IS_EVENT(EFHW_EVENT_PTR(evq, s, 1))) + +#endif /* __CI_EFHW_EVENTQ_MACROS_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/efhw_config.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/efhw_config.h @@ -0,0 +1,43 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides some limits used in both kernel and userland code. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EFAB_CONFIG_H__ +#define __CI_EFHW_EFAB_CONFIG_H__ + +#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */ + +#endif /* __CI_EFHW_EFAB_CONFIG_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/eventq.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/eventq.h @@ -0,0 +1,72 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains API provided by efhw/eventq.c file. This file is not + * designed for use outside of the SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EVENTQ_H__ +#define __CI_EFHW_EVENTQ_H__ + +#include +#include + +/*! Poll the event queue. */ +extern int efhw_keventq_poll(struct efhw_nic *, struct efhw_keventq *); + +/*! Callbacks for handling events. */ +struct efhw_ev_handler { + void (*wakeup_fn)(struct efhw_nic *nic, unsigned); + void (*timeout_fn)(struct efhw_nic *nic, unsigned); + void (*dmaq_flushed_fn) (struct efhw_nic *, unsigned, int); +}; + +extern int efhw_keventq_ctor(struct efhw_nic *, int instance, + struct efhw_keventq *, struct efhw_ev_handler *); +extern void efhw_keventq_dtor(struct efhw_nic *, struct efhw_keventq *); + +extern void efhw_handle_txdmaq_flushed(struct efhw_nic *, + struct efhw_ev_handler *, + efhw_event_t *); +extern void efhw_handle_rxdmaq_flushed(struct efhw_nic *, + struct efhw_ev_handler *, + efhw_event_t *); +extern void efhw_handle_wakeup_event(struct efhw_nic *, + struct efhw_ev_handler *, + efhw_event_t *); +extern void efhw_handle_timeout_event(struct efhw_nic *, + struct efhw_ev_handler *, + efhw_event_t *); + +#endif /* __CI_EFHW_EVENTQ_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/iopage_types.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/iopage_types.h @@ -0,0 +1,190 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides struct efhw_page and struct efhw_iopage for Linux + * kernel. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_IOPAGE_LINUX_H__ +#define __CI_EFHW_IOPAGE_LINUX_H__ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * struct efhw_page: A single page of memory. Directly mapped in the + * driver, and can be mapped to userlevel. + * + *--------------------------------------------------------------------*/ + +struct efhw_page { + unsigned long kva; +}; + +static inline int efhw_page_alloc(struct efhw_page *p) +{ + p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); + return p->kva ? 0 : -ENOMEM; +} + +static inline int efhw_page_alloc_zeroed(struct efhw_page *p) +{ + p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); + return p->kva ? 0 : -ENOMEM; +} + +static inline void efhw_page_free(struct efhw_page *p) +{ + free_page(p->kva); + EFHW_DO_DEBUG(memset(p, 0, sizeof(*p))); +} + +static inline char *efhw_page_ptr(struct efhw_page *p) +{ + return (char *)p->kva; +} + +static inline unsigned efhw_page_pfn(struct efhw_page *p) +{ + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); +} + +static inline void efhw_page_mark_invalid(struct efhw_page *p) +{ + p->kva = 0; +} + +static inline int efhw_page_is_valid(struct efhw_page *p) +{ + return p->kva != 0; +} + +static inline void efhw_page_init_from_va(struct efhw_page *p, void *va) +{ + p->kva = (unsigned long)va; +} + +/*-------------------------------------------------------------------- + * + * struct efhw_iopage: A single page of memory. Directly mapped in the driver, + * and can be mapped to userlevel. Can also be accessed by the NIC. + * + *--------------------------------------------------------------------*/ + +struct efhw_iopage { + struct efhw_page p; + dma_addr_t dma_addr; +}; + +static inline dma_addr_t efhw_iopage_dma_addr(struct efhw_iopage *p) +{ + return p->dma_addr; +} + +#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p) +#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p) +#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p) +#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p) + +/*-------------------------------------------------------------------- + * + * struct efhw_iopages: A set of pages that are contiguous in physical + * memory. Directly mapped in the driver, and can be mapped to userlevel. + * Can also be accessed by the NIC. + * + * NB. The O/S may be unwilling to allocate many, or even any of these. So + * only use this type where the NIC really needs a physically contiguous + * buffer. + * + *--------------------------------------------------------------------*/ + +struct efhw_iopages { + caddr_t kva; + unsigned order; + dma_addr_t dma_addr; +}; + +static inline caddr_t efhw_iopages_ptr(struct efhw_iopages *p) +{ + return p->kva; +} + +static inline unsigned efhw_iopages_pfn(struct efhw_iopages *p) +{ + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); +} + +static inline dma_addr_t efhw_iopages_dma_addr(struct efhw_iopages *p) +{ + return p->dma_addr; +} + +static inline unsigned efhw_iopages_size(struct efhw_iopages *p) +{ + return 1u << (p->order + PAGE_SHIFT); +} + +/* struct efhw_iopage <-> struct efhw_iopages conversions for handling + * physically contiguous allocations in iobufsets for iSCSI. This allows + * the essential information about contiguous allocations from + * efhw_iopages_alloc() to be saved away in the struct efhw_iopage array in + * an iobufset. (Changing the iobufset resource to use a union type would + * involve a lot of code changes, and make the iobufset's metadata larger + * which could be bad as it's supposed to fit into a single page on some + * platforms.) + */ +static inline void +efhw_iopage_init_from_iopages(struct efhw_iopage *iopage, + struct efhw_iopages *iopages, unsigned pageno) +{ + iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages)) + + (pageno * PAGE_SIZE); + iopage->dma_addr = efhw_iopages_dma_addr(iopages) + + (pageno * PAGE_SIZE); +} + +static inline void +efhw_iopages_init_from_iopage(struct efhw_iopages *iopages, + struct efhw_iopage *iopage, unsigned order) +{ + iopages->kva = (caddr_t) efhw_iopage_ptr(iopage); + EFHW_ASSERT(iopages->kva); + iopages->order = order; + iopages->dma_addr = efhw_iopage_dma_addr(iopage); +} + +#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/checks.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/checks.h @@ -0,0 +1,118 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides helpers to turn bit shifts into dword shifts and + * check that the bit fields haven't overflown the dword etc. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_CHECK_H__ +#define __CI_EFHW_CHECK_H__ + +/*---------------------------------------------------------------------------- + * + * Helpers to turn bit shifts into dword shifts and check that the bit fields + * haven't overflown the dword etc. Aim is to preserve consistency with the + * autogenerated headers - once stable we could hard code. + * + *---------------------------------------------------------------------------*/ + +/* mask constructors */ +#define __FALCON_MASK(WIDTH, T) ((((T)1) << (WIDTH)) - 1) +#define __FALCON_MASK32(WIDTH) __FALCON_MASK((WIDTH), uint32_t) +#define __FALCON_MASK64(WIDTH) __FALCON_MASK((WIDTH), uint64_t) + +#define __FALCON_MASKFIELD32(LBN, WIDTH) \ + ((uint32_t)(__FALCON_MASK32(WIDTH) << (LBN))) + +/* constructors for fields which span the first and second dwords */ +#define __LW(LBN) (32 - LBN) +#define __LOW(v, LBN, WIDTH) \ + ((uint32_t)(((v) & __FALCON_MASK64(__LW((LBN)))) << (LBN))) +#define __HIGH(v, LBN, WIDTH) \ + ((uint32_t)(((v) >> __LW((LBN))) & \ + __FALCON_MASK64((WIDTH - __LW((LBN)))))) +/* constructors for fields within the second dword */ +#define __DW2(LBN) ((LBN) - 32) + +/* constructors for fields which span the second and third dwords */ +#define __LW2(LBN) (64 - LBN) +#define __LOW2(v, LBN, WIDTH) \ + ((uint32_t)(((v) & __FALCON_MASK64(__LW2((LBN)))) << ((LBN) - 32))) +#define __HIGH2(v, LBN, WIDTH) \ + ((uint32_t)(((v) >> __LW2((LBN))) & \ + __FALCON_MASK64((WIDTH - __LW2((LBN)))))) + +/* constructors for fields within the third dword */ +#define __DW3(LBN) ((LBN) - 64) + +/* constructors for fields which span the third and fourth dwords */ +#define __LW3(LBN) (96 - LBN) +#define __LOW3(v, LBN, WIDTH) \ + ((uint32_t)(((v) & __FALCON_MASK64(__LW3((LBN)))) << ((LBN) - 64))) +#define __HIGH3(v, LBN, WIDTH) \ + ((ci_unit32)(((v) >> __LW3((LBN))) & \ + __FALCON_MASK64((WIDTH - __LW3((LBN)))))) + +/* constructors for fields within the fourth dword */ +#define __DW4(LBN) ((LBN) - 96) + +/* checks that the autogenerated headers are consistent with our model */ +#define __WIDTHCHCK(a, b) EFHW_ASSERT((a) == (b)) +#define __RANGECHCK(v, WIDTH) \ + EFHW_ASSERT(((uint64_t)(v) & ~(__FALCON_MASK64((WIDTH)))) == 0) + +/* fields within the first dword */ +#define __DWCHCK(LBN, WIDTH) \ + EFHW_ASSERT(((LBN) >= 0) && (((LBN)+(WIDTH)) <= 32)) + +/* fields which span the first and second dwords */ +#define __LWCHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW(LBN)) + +/* fields within the second dword */ +#define __DW2CHCK(LBN, WIDTH) \ + EFHW_ASSERT(((LBN) >= 32) && (((LBN)+(WIDTH)) <= 64)) + +/* fields which span the second and third dwords */ +#define __LW2CHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW2(LBN)) + +/* fields within the third dword */ +#define __DW3CHCK(LBN, WIDTH) \ + EFHW_ASSERT(((LBN) >= 64) && (((LBN)+(WIDTH)) <= 96)) + +/* fields which span the third and fourth dwords */ +#define __LW3CHK(LBN, WIDTH) EFHW_ASSERT(WIDTH >= __LW3(LBN)) + +/* fields within the fourth dword */ +#define __DW4CHCK(LBN, WIDTH) \ + EFHW_ASSERT(((LBN) >= 96) && (((LBN)+(WIDTH)) <= 128)) + +/* fields in the first qword */ +#define __QWCHCK(LBN, WIDTH) \ + EFHW_ASSERT(((LBN) >= 0) && (((LBN)+(WIDTH)) <= 64)) + +#endif /* __CI_EFHW_CHECK_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/iopage.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/iopage.h @@ -0,0 +1,58 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains OS-independent API for allocating iopage types. + * The implementation of these functions is highly OS-dependent. + * This file is not designed for use outside of the SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_RESOURCE_IOPAGE_H__ +#define __CI_DRIVER_RESOURCE_IOPAGE_H__ + +#include + +/*-------------------------------------------------------------------- + * + * memory allocation + * + *--------------------------------------------------------------------*/ + +extern int efhw_iopage_alloc(struct efhw_nic *, struct efhw_iopage *p); +extern void efhw_iopage_free(struct efhw_nic *, struct efhw_iopage *p); + +extern int efhw_iopages_alloc(struct efhw_nic *, struct efhw_iopages *p, + unsigned order); +extern void efhw_iopages_free(struct efhw_nic *, struct efhw_iopages *p); + +#endif /* __CI_DRIVER_RESOURCE_IOPAGE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/falcon.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/falcon.h @@ -0,0 +1,94 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains API provided by efhw/falcon.c file. This file is not + * designed for use outside of the SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_FALCON_H__ +#define __CI_EFHW_FALCON_H__ + +#include +#include + +/*---------------------------------------------------------------------------- + * + * Locks - unfortunately required + * + *---------------------------------------------------------------------------*/ + +#define FALCON_LOCK_DECL irq_flags_t lock_state +#define FALCON_LOCK_LOCK(nic) \ + spin_lock_irqsave((nic)->reg_lock, lock_state) +#define FALCON_LOCK_UNLOCK(nic) \ + spin_unlock_irqrestore((nic)->reg_lock, lock_state) + +extern struct efhw_func_ops falcon_char_functional_units; + +/*! specify a pace value for a TX DMA Queue */ +extern void falcon_nic_pace(struct efhw_nic *nic, uint dmaq, uint pace); + +/*! configure the pace engine */ +extern void falcon_nic_pace_cfg(struct efhw_nic *nic, int fb_base, + int bin_thresh); + +/*! confirm buffer table updates - should be used for items where + loss of data would be unacceptable. E.g for the buffers that back + an event or DMA queue */ +extern void falcon_nic_buffer_table_confirm(struct efhw_nic *nic); + +/*! Reset the all the TX DMA queue pointers. */ +extern void falcon_clobber_tx_dma_ptrs(struct efhw_nic *nic, uint dmaq); + +extern int +falcon_handle_char_event(struct efhw_nic *nic, + struct efhw_ev_handler *h, efhw_event_t *evp); + +/*! Acknowledge to HW that processing is complete on a given event queue */ +extern void falcon_nic_evq_ack(struct efhw_nic *nic, uint evq, /* evq id */ + uint rptr, /* new read pointer update */ + bool wakeup /* request a wakeup event if + ptr's != */ + ); + +extern void +falcon_nic_buffer_table_set_n(struct efhw_nic *nic, int buffer_id, + dma_addr_t dma_addr, uint bufsz, uint region, + int n_pages, int own_id); + +extern int falcon_nic_filter_ctor(struct efhw_nic *nic); + +extern void falcon_nic_filter_dtor(struct efhw_nic *nic); + +#endif /* __CI_EFHW_FALCON_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/common_sysdep.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/common_sysdep.h @@ -0,0 +1,61 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for + * userland-to-kernel interfaces. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_COMMON_LINUX_H__ +#define __CI_EFHW_COMMON_LINUX_H__ + +#include + +/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */ +#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G) +#define DMA_ADDR_T_FMT "%llx" +#else +#define DMA_ADDR_T_FMT "%x" +#endif + +/* Linux kernel also does not provide PRIx32... Sigh. */ +#define PRIx32 "x" + +#ifdef __ia64__ +# define PRIx64 "lx" +#else +# define PRIx64 "llx" +#endif + +#endif /* __CI_EFHW_COMMON_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/sysdep.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/sysdep.h @@ -0,0 +1,55 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for efhw library. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_SYSDEP_LINUX_H__ +#define __CI_EFHW_SYSDEP_LINUX_H__ + +#include +#include +#include +#include +#include + +#include /* necessary for etherdevice.h on some kernels */ +#include + +typedef unsigned long irq_flags_t; + +#define spin_lock_destroy(l_) do {} while (0) + +#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/falcon_hash.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/falcon_hash.h @@ -0,0 +1,58 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains API provided by efhw/falcon_hash.c file. + * Function declared in this file are not exported from the Linux + * sfc_resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_FALCON_HASH_H__ +#define __CI_EFHW_FALCON_HASH_H__ + +extern unsigned int +falcon_hash_get_ip_key(unsigned int src_ip, unsigned int src_port, + unsigned int dest_ip, unsigned int dest_port, + int tcp, int full); + +extern unsigned int +falcon_hash_function1(unsigned int key, unsigned int nfilters); + +extern unsigned int +falcon_hash_function2(unsigned int key, unsigned int nfilters); + +extern unsigned int +falcon_hash_iterator(unsigned int hash1, unsigned int hash2, + unsigned int n_search, unsigned int nfilters); + +#endif /* __CI_EFHW_FALCON_HASH_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/debug.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/debug.h @@ -0,0 +1,84 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides debug-related API for efhw library using Linux kernel + * primitives. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_DEBUG_LINUX_H__ +#define __CI_EFHW_DEBUG_LINUX_H__ + +#define EFHW_PRINTK_PREFIX "[sfc efhw] " + +#define EFHW_PRINTK(level, fmt, ...) \ + printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__) + +/* Following macros should be used with non-zero format parameters + * due to __VA_ARGS__ limitations. Use "%s" with __func__ if you can't + * find better parameters. */ +#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__) +#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) +#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) +#if 0 && !defined(NDEBUG) +#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) +#else +#define EFHW_TRACE(fmt, ...) +#endif + +#ifndef NDEBUG +#define EFHW_ASSERT(cond) BUG_ON((cond) == 0) +#define EFHW_DO_DEBUG(expr) expr +#else +#define EFHW_ASSERT(cond) +#define EFHW_DO_DEBUG(expr) +#endif + +#define EFHW_TEST(expr) \ + do { \ + if (unlikely(!(expr))) \ + BUG(); \ + } while (0) + +/* Build time asserts. We paste the line number into the type name + * so that the macro can be used more than once per file even if the + * compiler objects to multiple identical typedefs. Collisions + * between use in different header files is still possible. */ +#ifndef EFHW_BUILD_ASSERT +#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x) +#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x +#define EFHW_BUILD_ASSERT(e) \ + typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1] +#endif + +#endif /* __CI_EFHW_DEBUG_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efhw/common.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efhw/common.h @@ -0,0 +1,97 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides API of the efhw library which may be used both from + * the kernel and from the user-space code. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_COMMON_H__ +#define __CI_EFHW_COMMON_H__ + +#include + +enum efhw_arch { + EFHW_ARCH_FALCON, +}; + +typedef uint32_t efhw_buffer_addr_t; +#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]" + +/*! Comment? */ +typedef union { + uint64_t u64; + struct { + uint32_t a; + uint32_t b; + } opaque; +} efhw_event_t; + +/* Flags for TX/RX queues */ +#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */ +#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */ +#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */ +#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */ +#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */ +#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */ +#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */ +#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */ +#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */ +#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum + generation */ +#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */ + +/* Types of hardware filter */ +/* Each of these values implicitly selects scatter filters on B0 - or in + EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */ +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */ +#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */ +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */ +#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */ +/* Same again, but with RSS (for B0 only) */ +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4) +#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5) +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6) +#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7) + +#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */ +#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */ +#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */ +#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */ + +#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */ + +#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter + support */ + +#endif /* __CI_EFHW_COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_manager.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_manager.h @@ -0,0 +1,155 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains type definitions for VI resource. These types + * may be used outside of the SFC resource driver, but such use is not + * recommended. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ +#define __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ + +#include +#include + + +#define EFRM_VI_RM_DMA_QUEUE_COUNT 2 +#define EFRM_VI_RM_DMA_QUEUE_TX 0 +#define EFRM_VI_RM_DMA_QUEUE_RX 1 + +/** Numbers of bits which can be set in the evq_state member of + * vi_resource_evq_info. */ +enum { + /** This bit is set if a wakeup has been requested on the NIC. */ + VI_RESOURCE_EVQ_STATE_WAKEUP_PENDING, + /** This bit is set if the wakeup is valid for the sleeping + * process. */ + VI_RESOURCE_EVQ_STATE_CALLBACK_REGISTERED, + /** This bit is set if a wakeup or timeout event is currently being + * processed. */ + VI_RESOURCE_EVQ_STATE_BUSY, +}; +#define VI_RESOURCE_EVQ_STATE(X) \ + (((int32_t)1) << (VI_RESOURCE_EVQ_STATE_##X)) + + +/*! Global information for the VI resource manager. */ +struct vi_resource_manager { + struct efrm_resource_manager rm; + + struct kfifo *instances_with_timer; + int with_timer_base; + int with_timer_limit; + struct kfifo *instances_with_interrupt; + int with_interrupt_base; + int with_interrupt_limit; + + bool iscsi_dmaq_instance_is_free; + + /* We keep VI resources which need flushing on these lists. The VI + * is put on the outstanding list when the flush request is issued + * to the hardware and removed when the flush event arrives. The + * hardware can only handle a limited number of RX flush requests at + * once, so VIs are placed in the waiting list until the flush can + * be issued. Flushes can be requested by the client or internally + * by the VI resource manager. In the former case, the reference + * count must be non-zero for the duration of the flush and in the + * later case, the reference count must be zero. */ + struct list_head rx_flush_waiting_list; + struct list_head rx_flush_outstanding_list; + struct list_head tx_flush_outstanding_list; + int rx_flush_outstanding_count; + + /* once the flush has happened we push the close into the work queue + * so its OK on Windows to free the resources (Bug 3469). Resources + * on this list have zero reference count. + */ + struct list_head close_pending; + struct work_struct work_item; + struct workqueue_struct *workqueue; +}; + +struct vi_resource_nic_info { + struct eventq_resource_hardware evq_pages; + struct efhw_iopages dmaq_pages[EFRM_VI_RM_DMA_QUEUE_COUNT]; +}; + +struct vi_resource { + /* Some macros make the assumption that the struct efrm_resource is + * the first member of a struct vi_resource. */ + struct efrm_resource rs; + atomic_t evq_refs; /*!< Number of users of the event queue. */ + + uint32_t bar_mmap_bytes; + uint32_t mem_mmap_bytes; + + int32_t evq_capacity; + int32_t dmaq_capacity[EFRM_VI_RM_DMA_QUEUE_COUNT]; + + uint8_t dmaq_tag[EFRM_VI_RM_DMA_QUEUE_COUNT]; + uint16_t flags; + + /* we keep PT endpoints that have been destroyed on a list + * until we have seen their TX and RX DMAQs flush complete + * (see Bug 1217) + */ + struct list_head rx_flush_link; + struct list_head tx_flush_link; + int rx_flushing; + int rx_flush_outstanding; + int tx_flushing; + uint64_t flush_time; + int flush_count; + + void (*flush_callback_fn)(void *); + void *flush_callback_arg; + + void (*evq_callback_fn) (void *arg, int is_timeout, + struct efhw_nic *nic); + void *evq_callback_arg; + + struct vi_resource *evq_virs; /*!< EVQ for DMA queues */ + + struct efhw_buffer_table_allocation + dmaq_buf_tbl_alloc[EFRM_VI_RM_DMA_QUEUE_COUNT]; + + struct vi_resource_nic_info nic_info; +}; + +#undef vi_resource +#define vi_resource(rs1) container_of((rs1), struct vi_resource, rs) + +static inline dma_addr_t +efrm_eventq_dma_addr(struct vi_resource *virs) +{ + struct eventq_resource_hardware *hw; + hw = &virs->nic_info.evq_pages; + return efhw_iopages_dma_addr(&hw->iobuff) + hw->iobuff_off; +} + +#endif /* __CI_DRIVER_EFAB_VI_RESOURCE_MANAGER_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/efrm_client.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/efrm_client.h @@ -0,0 +1,32 @@ +#ifndef __EFRM_CLIENT_H__ +#define __EFRM_CLIENT_H__ + + +struct efrm_client; + + +struct efrm_client_callbacks { + /* Called before device is reset. Callee may block. */ + void (*pre_reset)(struct efrm_client *, void *user_data); + void (*stop)(struct efrm_client *, void *user_data); + void (*restart)(struct efrm_client *, void *user_data); +}; + + +#define EFRM_IFINDEX_DEFAULT -1 + + +/* NB. Callbacks may be invoked even before this returns. */ +extern int efrm_client_get(int ifindex, struct efrm_client_callbacks *, + void *user_data, struct efrm_client **client_out); +extern void efrm_client_put(struct efrm_client *); + +extern struct efhw_nic *efrm_client_get_nic(struct efrm_client *); + +#if 0 +/* For each resource type... */ +extern void efrm_x_resource_resume(struct x_resource *); +#endif + + +#endif /* __EFRM_CLIENT_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/iobufset.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/iobufset.h @@ -0,0 +1,110 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API for iobufset resource. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_IOBUFSET_H__ +#define __CI_EFRM_IOBUFSET_H__ + +#include + +/*! Iobufset resource structture. + * Users should not access the structure fields directly, but use the API + * below. + * However, this structure should not be moved out of public headers, + * because part of API (ex. efrm_iobufset_dma_addr function) is inline and + * is used in the fast-path code. + */ +struct iobufset_resource { + struct efrm_resource rs; + struct vi_resource *evq; + struct iobufset_resource *linked; + struct efhw_buffer_table_allocation buf_tbl_alloc; + unsigned int n_bufs; + unsigned int pages_per_contiguous_chunk; + unsigned chunk_order; + struct efhw_iopage bufs[1]; + /*!< up to n_bufs can follow this, so this must be the last member */ +}; + +#define iobufset_resource(rs1) \ + container_of((rs1), struct iobufset_resource, rs) + +/*! + * Allocate iobufset resource. + * + * \param vi VI that "owns" these buffers. Grabs a reference + * on success. + * \param linked Uses memory from an existing iobufset. Grabs a + * reference on success. + * \param iobrs_out pointer to return the new filter resource + * + * \return status code; if non-zero, frs_out is unchanged + */ +extern int +efrm_iobufset_resource_alloc(int32_t n_pages, + int32_t pages_per_contiguous_chunk, + struct vi_resource *vi, + struct iobufset_resource *linked, + bool phys_addr_mode, + struct iobufset_resource **iobrs_out); + +extern void efrm_iobufset_resource_free(struct iobufset_resource *); +extern void efrm_iobufset_resource_release(struct iobufset_resource *); + +static inline char * +efrm_iobufset_ptr(struct iobufset_resource *rs, unsigned offs) +{ + EFRM_ASSERT(offs < (unsigned)(rs->n_bufs << PAGE_SHIFT)); + return efhw_iopage_ptr(&rs->bufs[offs >> PAGE_SHIFT]) + + (offs & (PAGE_SIZE - 1)); +} + +static inline char *efrm_iobufset_page_ptr(struct iobufset_resource *rs, + unsigned page_i) +{ + EFRM_ASSERT(page_i < (unsigned)rs->n_bufs); + return efhw_iopage_ptr(&rs->bufs[page_i]); +} + +static inline dma_addr_t +efrm_iobufset_dma_addr(struct iobufset_resource *rs, unsigned offs) +{ + EFRM_ASSERT(offs < (unsigned)(rs->n_bufs << PAGE_SHIFT)); + return efhw_iopage_dma_addr(&rs->bufs[offs >> PAGE_SHIFT]) + + (offs & (PAGE_SIZE - 1)); +} + +#endif /* __CI_EFRM_IOBUFSET_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_private.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource_private.h @@ -0,0 +1,65 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains private API for VI resource. The API is not designed + * to be used outside of the SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_VI_RESOURCE_PRIVATE_H__ +#define __CI_EFRM_VI_RESOURCE_PRIVATE_H__ + +#include +#include + +extern struct vi_resource_manager *efrm_vi_manager; + +/*************************************************************************/ + +extern void efrm_vi_rm_delayed_free(struct work_struct *data); + +extern void efrm_vi_rm_salvage_flushed_vis(void); + +void efrm_vi_rm_free_flushed_resource(struct vi_resource *virs); + +void efrm_vi_rm_init_dmaq(struct vi_resource *virs, int queue_index, + struct efhw_nic *nic); + +/*! Wakeup handler */ +extern void efrm_handle_wakeup_event(struct efhw_nic *nic, unsigned id); + +/*! Timeout handler */ +extern void efrm_handle_timeout_event(struct efhw_nic *nic, unsigned id); + +/*! DMA flush handler */ +extern void efrm_handle_dmaq_flushed(struct efhw_nic *nic, unsigned id, + int rx_flush); + +/*! SRAM update handler */ +extern void efrm_handle_sram_event(struct efhw_nic *nic); + +#endif /* __CI_EFRM_VI_RESOURCE_PRIVATE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/vi_resource.h @@ -0,0 +1,157 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains public API for VI resource. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_VI_RESOURCE_H__ +#define __CI_EFRM_VI_RESOURCE_H__ + +#include +#include +#include + +struct vi_resource; + +/* Make these inline instead of macros for type checking */ +static inline struct vi_resource * +efrm_to_vi_resource(struct efrm_resource *rs) +{ + EFRM_ASSERT(EFRM_RESOURCE_TYPE(rs->rs_handle) == EFRM_RESOURCE_VI); + return (struct vi_resource *) rs; +} +static inline struct +efrm_resource *efrm_from_vi_resource(struct vi_resource *rs) +{ + return (struct efrm_resource *)rs; +} + +#define EFAB_VI_RESOURCE_INSTANCE(virs) \ + EFRM_RESOURCE_INSTANCE(efrm_from_vi_resource(virs)->rs_handle) + +#define EFAB_VI_RESOURCE_PRI_ARG(virs) \ + EFRM_RESOURCE_PRI_ARG(efrm_from_vi_resource(virs)->rs_handle) + +extern int +efrm_vi_resource_alloc(struct efrm_client *client, + struct vi_resource *evq_virs, + uint16_t vi_flags, int32_t evq_capacity, + int32_t txq_capacity, int32_t rxq_capacity, + uint8_t tx_q_tag, uint8_t rx_q_tag, + struct vi_resource **virs_in_out, + uint32_t *out_io_mmap_bytes, + uint32_t *out_mem_mmap_bytes, + uint32_t *out_txq_capacity, + uint32_t *out_rxq_capacity); + +extern void efrm_vi_resource_free(struct vi_resource *); +extern void efrm_vi_resource_release(struct vi_resource *); + + +/*-------------------------------------------------------------------- + * + * eventq handling + * + *--------------------------------------------------------------------*/ + +/*! Reset an event queue and clear any associated timers */ +extern void efrm_eventq_reset(struct vi_resource *virs); + +/*! Register a kernel-level handler for the event queue. This function is + * called whenever a timer expires, or whenever the event queue is woken + * but no thread is blocked on it. + * + * This function returns -EBUSY if a callback is already installed. + * + * \param rs Event-queue resource + * \param handler Callback-handler + * \param arg Argument to pass to callback-handler + * \return Status code + */ +extern int +efrm_eventq_register_callback(struct vi_resource *rs, + void (*handler)(void *arg, int is_timeout, + struct efhw_nic *nic), + void *arg); + +/*! Kill the kernel-level callback. + * + * This function stops the timer from running and unregisters the callback + * function. It waits for any running timeout handlers to complete before + * returning. + * + * \param rs Event-queue resource + * \return Nothing + */ +extern void efrm_eventq_kill_callback(struct vi_resource *rs); + +/*! Ask the NIC to generate a wakeup when an event is next delivered. */ +extern void efrm_eventq_request_wakeup(struct vi_resource *rs, + unsigned current_ptr); + +/*! Register a kernel-level handler for flush completions. + * \TODO Currently, it is unsafe to install a callback more than once. + * + * \param rs VI resource being flushed. + * \param handler Callback handler function. + * \param arg Argument to be passed to handler. + */ +extern void +efrm_vi_register_flush_callback(struct vi_resource *rs, + void (*handler)(void *), + void *arg); + +int efrm_vi_resource_flush_retry(struct vi_resource *virs); + +/*! Comment? */ +extern int efrm_pt_flush(struct vi_resource *); + +/*! Comment? */ +extern int efrm_pt_pace(struct vi_resource *, unsigned int val); + +uint32_t efrm_vi_rm_txq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */); +uint32_t efrm_vi_rm_rxq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */); +uint32_t efrm_vi_rm_evq_bytes(struct vi_resource *virs + /*,struct efhw_nic *nic */); + + +/* Fill [out_vi_data] with information required to allow a VI to be init'd. + * [out_vi_data] must ref at least VI_MAPPINGS_SIZE bytes. + */ +extern void efrm_vi_resource_mappings(struct vi_resource *, void *out_vi_data); + + +#endif /* __CI_EFRM_VI_RESOURCE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/efrm_nic.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/efrm_nic.h @@ -0,0 +1,26 @@ +#ifndef __EFRM_NIC_H__ +#define __EFRM_NIC_H__ + +#include + + +struct efrm_nic_per_vi { + unsigned long state; + struct vi_resource *vi; +}; + + +struct efrm_nic { + struct efhw_nic efhw_nic; + struct list_head link; + struct list_head clients; + struct efrm_nic_per_vi *vis; +}; + + +#define efrm_nic(_efhw_nic) \ + container_of(_efhw_nic, struct efrm_nic, efhw_nic) + + + +#endif /* __EFRM_NIC_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/driver_private.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/driver_private.h @@ -0,0 +1,89 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides private API of efrm library to be used from the SFC + * resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_DRIVER_PRIVATE_H__ +#define __CI_EFRM_DRIVER_PRIVATE_H__ + +#include +#include + +/*-------------------------------------------------------------------- + * + * global variables + * + *--------------------------------------------------------------------*/ + +/* Internal structure for resource driver */ +extern struct efrm_resource_manager *efrm_rm_table[]; + +/*-------------------------------------------------------------------- + * + * efrm_nic_table handling + * + *--------------------------------------------------------------------*/ + +struct efrm_nic; + +extern void efrm_driver_ctor(void); +extern void efrm_driver_dtor(void); +extern int efrm_driver_register_nic(struct efrm_nic *, int nic_index, + int ifindex); +extern int efrm_driver_unregister_nic(struct efrm_nic *); + +/*-------------------------------------------------------------------- + * + * create/destroy resource managers + * + *--------------------------------------------------------------------*/ + +struct vi_resource_dimensions { + unsigned evq_int_min, evq_int_lim; + unsigned evq_timer_min, evq_timer_lim; + unsigned rxq_min, rxq_lim; + unsigned txq_min, txq_lim; +}; + +/*! Initialise resources */ +extern int +efrm_resources_init(const struct vi_resource_dimensions *, + int buffer_table_min, int buffer_table_lim); + +/*! Tear down resources */ +extern void efrm_resources_fini(void); + +#endif /* __CI_EFRM_DRIVER_PRIVATE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/resource_id.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/resource_id.h @@ -0,0 +1,104 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public type and definitions resource handle, and the + * definitions of resource types. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFRM_RESOURCE_ID_H__ +#define __CI_DRIVER_EFRM_RESOURCE_ID_H__ + +/*********************************************************************** + * Resource handles + * + * Resource handles are intended for identifying resources at kernel + * level, within the context of a particular NIC. particularly because + * for some resource types, the low 16 bites correspond to hardware + * IDs. They were historically also used at user level, with a nonce + * stored in the bits 16 to 27 (inclusive), but that approach is + * deprecated (but sill alive!). + * + * The handle value 0 is used to mean "no resource". + * Identify resources within the context of a file descriptor at user + * level. + ***********************************************************************/ + +typedef struct { + uint32_t handle; +} efrm_resource_handle_t; + +/* You may think these following functions should all have + * _HANDLE_ in their names, but really we are providing an abstract set + * of methods on a (hypothetical) efrm_resource_t object, with + * efrm_resource_handle_t being just the reference one holds to access + * the object (aka "this" or "self"). + */ + +/* Below I use inline instead of macros where possible in order to get + * more type checking help from the compiler; hopefully we'll never + * have to rewrite these to use #define as we've found some horrible + * compiler on which we cannot make static inline do the Right Thing (tm). + * + * For consistency and to avoid pointless change I spell these + * routines as macro names (CAPTILIZE_UNDERSCORED), which also serves + * to remind people they are compact and inlined. + */ + +#define EFRM_RESOURCE_FMT "[rs:%08x]" + +static inline unsigned EFRM_RESOURCE_PRI_ARG(efrm_resource_handle_t h) +{ + return h.handle; +} + +static inline unsigned EFRM_RESOURCE_INSTANCE(efrm_resource_handle_t h) +{ + return h.handle & 0x0000ffff; +} + +static inline unsigned EFRM_RESOURCE_TYPE(efrm_resource_handle_t h) +{ + return (h.handle & 0xf0000000) >> 28; +} + +/*********************************************************************** + * Resource type codes + ***********************************************************************/ + +#define EFRM_RESOURCE_IOBUFSET 0x0 +#define EFRM_RESOURCE_VI 0x1 +#define EFRM_RESOURCE_FILTER 0x2 +#define EFRM_RESOURCE_NUM 0x3 /* This isn't a resource! */ + +#define EFRM_RESOURCE_NAME(type) \ + ((type) == EFRM_RESOURCE_IOBUFSET? "IOBUFSET" : \ + (type) == EFRM_RESOURCE_VI? "VI" : \ + (type) == EFRM_RESOURCE_FILTER? "FILTER" : \ + "") + +#endif /* __CI_DRIVER_EFRM_RESOURCE_ID_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/sysdep_linux.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/sysdep_linux.h @@ -0,0 +1,93 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for efrm library. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h + * Copyright (C) 2004 Stelian Pop + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_SYSDEP_LINUX_H__ +#define __CI_EFRM_SYSDEP_LINUX_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/******************************************************************** + * + * List API + * + ********************************************************************/ + +static inline struct list_head *list_pop(struct list_head *list) +{ + struct list_head *link = list->next; + list_del(link); + return link; +} + +static inline struct list_head *list_pop_tail(struct list_head *list) +{ + struct list_head *link = list->prev; + list_del(link); + return link; +} + +/******************************************************************** + * + * Kfifo API + * + ********************************************************************/ + +static inline void kfifo_vfree(struct kfifo *fifo) +{ + vfree(fifo->buffer); + kfree(fifo); +} + +#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/nic_set.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/nic_set.h @@ -0,0 +1,104 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API for NIC sets. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_NIC_SET_H__ +#define __CI_EFRM_NIC_SET_H__ + +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * efrm_nic_set_t - tracks which NICs something has been done on + * + *--------------------------------------------------------------------*/ + +/* Internal suructure of efrm_nic_set_t should not be referenced outside of + * this file. Add a new accessor if you should do it. */ +typedef struct { + uint32_t nics; +} efrm_nic_set_t; + +#if EFHW_MAX_NR_DEVS > 32 +#error change efrm_nic_set to handle EFHW_MAX_NR_DEVS number of devices +#endif + +static inline bool +efrm_nic_set_read(const efrm_nic_set_t *nic_set, unsigned index) +{ + EFRM_ASSERT(nic_set); + EFRM_ASSERT(index < EFHW_MAX_NR_DEVS && index < 32); + return (nic_set->nics & (1 << index)) ? true : false; +} + +static inline void +efrm_nic_set_write(efrm_nic_set_t *nic_set, unsigned index, bool value) +{ + EFRM_ASSERT(nic_set); + EFRM_ASSERT(index < EFHW_MAX_NR_DEVS && index < 32); + EFRM_ASSERT(value == false || value == true); + nic_set->nics = (nic_set->nics & (~(1 << index))) + (value << index); +} + +static inline void efrm_nic_set_clear(efrm_nic_set_t *nic_set) +{ + nic_set->nics = 0; +} + +static inline void efrm_nic_set_all(efrm_nic_set_t *nic_set) +{ + nic_set->nics = 0xffffffff; +} + +static inline bool efrm_nic_set_is_all_clear(efrm_nic_set_t *nic_set) +{ + return nic_set->nics == 0 ? true : false; +} + +#define EFRM_NIC_SET_FMT "%x" + +static inline uint32_t efrm_nic_set_pri_arg(efrm_nic_set_t *nic_set) +{ + return nic_set->nics; +} + +#define EFRM_FOR_EACH_NIC_INDEX_IN_SET(_set, _nic_i) \ + for ((_nic_i) = 0; (_nic_i) < EFHW_MAX_NR_DEVS; ++(_nic_i)) \ + if (efrm_nic_set_read((_set), (_nic_i))) + +#endif /* __CI_EFRM_NIC_SET_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/buddy.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/buddy.h @@ -0,0 +1,68 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides private API for buddy allocator. This API is not + * designed for use outside of SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_BUDDY_H__ +#define __CI_EFRM_BUDDY_H__ + +#include + +/*! Comment? */ +struct efrm_buddy_allocator { + struct list_head *free_lists; /* array[order+1] */ + struct list_head *links; /* array[1<order; +} + +int efrm_buddy_ctor(struct efrm_buddy_allocator *b, unsigned order); +void efrm_buddy_dtor(struct efrm_buddy_allocator *b); +int efrm_buddy_alloc(struct efrm_buddy_allocator *b, unsigned order); +void efrm_buddy_free(struct efrm_buddy_allocator *b, unsigned addr, + unsigned order); + + +#endif /* __CI_EFRM_BUDDY_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/private.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/private.h @@ -0,0 +1,118 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides private API of efrm library -- resource handling. + * This API is not designed for use outside of SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_PRIVATE_H__ +#define __CI_EFRM_PRIVATE_H__ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * create resource managers + * + *--------------------------------------------------------------------*/ + +/*! Create a resource manager for various types of resources + */ +extern int +efrm_create_iobufset_resource_manager(struct efrm_resource_manager **out); + +extern int +efrm_create_filter_resource_manager(struct efrm_resource_manager **out); + +extern int +efrm_create_vi_resource_manager(struct efrm_resource_manager **out, + const struct vi_resource_dimensions *); + + +/*-------------------------------------------------------------------- + * + * Instance pool management + * + *--------------------------------------------------------------------*/ + +/*! Allocate instance pool. Use kfifo_vfree to destroy it. */ +static inline int +efrm_kfifo_id_ctor(struct kfifo **ids_out, + unsigned int base, unsigned int limit, spinlock_t *lock) +{ + unsigned int i; + struct kfifo *ids; + unsigned char *buffer; + unsigned int size = roundup_pow_of_two((limit - base) * sizeof(int)); + EFRM_ASSERT(base <= limit); + buffer = vmalloc(size); + ids = kfifo_init(buffer, size, GFP_KERNEL, lock); + if (IS_ERR(ids)) + return PTR_ERR(ids); + for (i = base; i < limit; i++) + EFRM_VERIFY_EQ(__kfifo_put(ids, (unsigned char *)&i, + sizeof(i)), sizeof(i)); + + *ids_out = ids; + return 0; +} + +/*-------------------------------------------------------------------- + * + * Various private functions + * + *--------------------------------------------------------------------*/ + +/*! Initialize the fields in the provided resource manager memory area + * \param rm The area of memory to be initialized + * \param dtor A method to destroy the resource manager + * \param name A Textual name for the resource manager + * \param type The type of resource managed + * \param initial_table_size Initial size of the ID table + * \param auto_destroy Destroy resource manager on driver onload iff true + * + * A default table size is provided if the value 0 is provided. + */ +extern int +efrm_resource_manager_ctor(struct efrm_resource_manager *rm, + void (*dtor)(struct efrm_resource_manager *), + const char *name, unsigned type); + +extern void efrm_resource_manager_dtor(struct efrm_resource_manager *rm); + + +#endif /* __CI_EFRM_PRIVATE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/resource.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/resource.h @@ -0,0 +1,119 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public interface of efrm library -- resource handling. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_RESOURCE_H__ +#define __CI_EFRM_RESOURCE_H__ + +/*-------------------------------------------------------------------- + * + * headers for type dependencies + * + *--------------------------------------------------------------------*/ + +#include +#include +#include +#include + +#ifndef __ci_driver__ +#error "Driver-only file" +#endif + +/*-------------------------------------------------------------------- + * + * struct efrm_resource - represents an allocated resource + * (eg. pinned pages of memory, or resource on a NIC) + * + *--------------------------------------------------------------------*/ + +/*! Representation of an allocated resource */ +struct efrm_resource { + int rs_ref_count; + efrm_resource_handle_t rs_handle; + struct efrm_client *rs_client; + struct list_head rs_client_link; + struct list_head rs_manager_link; +}; + +/*-------------------------------------------------------------------- + * + * managed resource abstraction + * + *--------------------------------------------------------------------*/ + +/*! Factory for resources of a specific type */ +struct efrm_resource_manager { + const char *rm_name; /*!< human readable only */ + spinlock_t rm_lock; +#ifndef NDEBUG + unsigned rm_type; +#endif + int rm_resources; + int rm_resources_hiwat; + struct list_head rm_resources_list; + /** + * Destructor for the resource manager. Other resource managers + * might be already dead, although the system guarantees that + * managers are destructed in the order by which they were created + */ + void (*rm_dtor)(struct efrm_resource_manager *); +}; + +#ifdef NDEBUG +# define EFRM_RESOURCE_ASSERT_VALID(rs, rc_mbz) +# define EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm) +#else +/*! Check validity of resource and report on failure */ +extern void efrm_resource_assert_valid(struct efrm_resource *, + int rc_may_be_zero, + const char *file, int line); +# define EFRM_RESOURCE_ASSERT_VALID(rs, rc_mbz) \ + efrm_resource_assert_valid((rs), (rc_mbz), __FILE__, __LINE__) + +/*! Check validity of resource manager and report on failure */ +extern void efrm_resource_manager_assert_valid(struct efrm_resource_manager *, + const char *file, int line); +# define EFRM_RESOURCE_MANAGER_ASSERT_VALID(rm) \ + efrm_resource_manager_assert_valid((rm), __FILE__, __LINE__) +#endif + + +extern void efrm_resource_ref(struct efrm_resource *rs); +extern int __efrm_resource_release(struct efrm_resource *); + + +#endif /* __CI_EFRM_RESOURCE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/buffer_table.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/buffer_table.h @@ -0,0 +1,81 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides private buffer table API. This API is not designed + * for use outside of SFC resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_BUFFER_TABLE_H__ +#define __CI_EFRM_BUFFER_TABLE_H__ + +#include + +/*-------------------------------------------------------------------- + * + * NIC's buffer table. + * + *--------------------------------------------------------------------*/ + +/*! Managed interface. */ + +/*! construct a managed buffer table object, allocated over a region of + * the NICs buffer table space + */ +extern int efrm_buffer_table_ctor(unsigned low, unsigned high); +/*! destructor for above */ +extern void efrm_buffer_table_dtor(void); + +/*! allocate a contiguous region of buffer table space */ +extern int efrm_buffer_table_alloc(unsigned order, + struct efhw_buffer_table_allocation *a); + + +/*-------------------------------------------------------------------- + * + * buffer table operations through the HW independent API + * + *--------------------------------------------------------------------*/ + +/*! free a previously allocated region of buffer table space */ +extern void efrm_buffer_table_free(struct efhw_buffer_table_allocation *a); + +/*! commit the update of a buffer table entry to every NIC */ +extern void efrm_buffer_table_commit(void); + +extern void efrm_buffer_table_set(struct efhw_buffer_table_allocation *, + struct efhw_nic *, + unsigned i, dma_addr_t dma_addr, int owner); + + +#endif /* __CI_EFRM_BUFFER_TABLE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/sysdep.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/sysdep.h @@ -0,0 +1,46 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides Linux-like system-independent API for efrm library. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_SYSDEP_H__ +#define __CI_EFRM_SYSDEP_H__ + +/* Spinlocks are defined in efhw/sysdep.h */ +#include + +#include + +#endif /* __CI_EFRM_SYSDEP_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/debug.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/debug.h @@ -0,0 +1,78 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides debug-related API for efrm library using Linux kernel + * primitives. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_DEBUG_LINUX_H__ +#define __CI_EFRM_DEBUG_LINUX_H__ + +#define EFRM_PRINTK_PREFIX "[sfc efrm] " + +#define EFRM_PRINTK(level, fmt, ...) \ + printk(level EFRM_PRINTK_PREFIX fmt "\n", __VA_ARGS__) + +/* Following macros should be used with non-zero format parameters + * due to __VA_ARGS__ limitations. Use "%s" with __func__ if you can't + * find better parameters. */ +#define EFRM_ERR(fmt, ...) EFRM_PRINTK(KERN_ERR, fmt, __VA_ARGS__) +#define EFRM_WARN(fmt, ...) EFRM_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) +#define EFRM_NOTICE(fmt, ...) EFRM_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) +#if !defined(NDEBUG) +#define EFRM_TRACE(fmt, ...) EFRM_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) +#else +#define EFRM_TRACE(fmt, ...) +#endif + +#ifndef NDEBUG +#define EFRM_ASSERT(cond) BUG_ON((cond) == 0) +#define _EFRM_ASSERT(cond, file, line) \ + do { \ + if (unlikely(!(cond))) { \ + EFRM_ERR("assertion \"%s\" failed at %s %d", \ + #cond, file, line); \ + BUG(); \ + } \ + } while (0) + +#define EFRM_DO_DEBUG(expr) expr +#define EFRM_VERIFY_EQ(expr, val) EFRM_ASSERT((expr) == (val)) +#else +#define EFRM_ASSERT(cond) +#define EFRM_DO_DEBUG(expr) +#define EFRM_VERIFY_EQ(expr, val) expr +#endif + +#endif /* __CI_EFRM_DEBUG_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/filter.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/filter.h @@ -0,0 +1,122 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API for filter resource. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_FILTER_H__ +#define __CI_EFRM_FILTER_H__ + +#include +#include + + +struct filter_resource; +struct vi_resource; +struct efrm_client; + + +/*! + * Allocate filter resource. + * + * \param vi_parent VI resource to use as parent. The function takes + * reference to the VI resource on success. + * \param frs_out pointer to return the new filter resource + * + * \return status code; if non-zero, frs_out is unchanged + */ +extern int +efrm_filter_resource_alloc(struct vi_resource *vi_parent, + struct filter_resource **frs_out); + +extern void +efrm_filter_resource_release(struct filter_resource *); + + +extern int efrm_filter_resource_clear(struct filter_resource *frs); + +extern int __efrm_filter_resource_set(struct filter_resource *frs, int type, + unsigned saddr_be32, uint16_t sport_be16, + unsigned daddr_be32, uint16_t dport_be16); + +static inline int +efrm_filter_resource_tcp_set(struct filter_resource *frs, + unsigned saddr, uint16_t sport, + unsigned daddr, uint16_t dport) +{ + int type; + + EFRM_ASSERT((saddr && sport) || (!saddr && !sport)); + + type = + saddr ? EFHW_IP_FILTER_TYPE_TCP_FULL : + EFHW_IP_FILTER_TYPE_TCP_WILDCARD; + + return __efrm_filter_resource_set(frs, type, + saddr, sport, daddr, dport); +} + +static inline int +efrm_filter_resource_udp_set(struct filter_resource *frs, + unsigned saddr, uint16_t sport, + unsigned daddr, uint16_t dport) +{ + int type; + + EFRM_ASSERT((saddr && sport) || (!saddr && !sport)); + + type = + saddr ? EFHW_IP_FILTER_TYPE_UDP_FULL : + EFHW_IP_FILTER_TYPE_UDP_WILDCARD; + + return __efrm_filter_resource_set(frs, + type, saddr, sport, daddr, dport); +} + + +extern int +efrm_filter_resource_instance(struct filter_resource *); + +extern struct efrm_resource * +efrm_filter_resource_to_resource(struct filter_resource *); + +extern struct filter_resource * +efrm_filter_resource_from_resource(struct efrm_resource *); + +extern void +efrm_filter_resource_free(struct filter_resource *); + + +#endif /* __CI_EFRM_FILTER_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/efrm/nic_table.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/efrm/nic_table.h @@ -0,0 +1,98 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API for NIC table. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_NIC_TABLE_H__ +#define __CI_EFRM_NIC_TABLE_H__ + +#include +#include + +/*-------------------------------------------------------------------- + * + * struct efrm_nic_table - top level driver object keeping all NICs - + * implemented in driver_object.c + * + *--------------------------------------------------------------------*/ + +/*! Comment? */ +struct efrm_nic_table { + /*! nics attached to this driver */ + struct efhw_nic *nic[EFHW_MAX_NR_DEVS]; + /*! pointer to an arbitrary struct efhw_nic if one exists; + * for code which does not care which NIC it wants but + * still needs one. Note you cannot assume nic[0] exists. */ + struct efhw_nic *a_nic; + uint32_t nic_count; /*!< number of nics attached to this driver */ + spinlock_t lock; /*!< lock for table modifications */ + atomic_t ref_count; /*!< refcount for users of nic table */ +}; + +/* Resource driver structures used by other drivers as well */ +extern struct efrm_nic_table *efrm_nic_tablep; + +static inline void efrm_nic_table_hold(void) +{ + atomic_inc(&efrm_nic_tablep->ref_count); +} + +static inline void efrm_nic_table_rele(void) +{ + atomic_dec(&efrm_nic_tablep->ref_count); +} + +static inline int efrm_nic_table_held(void) +{ + return atomic_read(&efrm_nic_tablep->ref_count) != 0; +} + +/* Run code block _x multiple times with variable nic set to each + * registered NIC in turn. + * DO NOT "break" out of this loop early. */ +#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \ + for ((_nic_i) = (efrm_nic_table_hold(), 0); \ + (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ + (_nic_i)++) \ + if (((_nic) = efrm_nic_tablep->nic[_nic_i])) + +#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \ + for ((_i) = (efrm_nic_table_hold(), 0); \ + (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ + ++(_i)) \ + if (((_nic) = efrm_nic_tablep->nic[_i]) && \ + efrm_nic_set_read((_set), (_i))) + +#endif /* __CI_EFRM_NIC_TABLE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/resource/linux_efhw_nic.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/resource/linux_efhw_nic.h @@ -0,0 +1,69 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains definition of the public type struct linux_efhw_nic. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ +#define __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ + +#include +#include + + +/************************************************************************ + * Per-nic structure in the resource driver * + ************************************************************************/ + +struct linux_efhw_nic { + struct efrm_nic efrm_nic; + + struct pci_dev *pci_dev; /*!< pci descriptor */ + struct tasklet_struct tasklet; /*!< for interrupt bottom half */ + + /* Physical addresses of the control aperture bar. */ + unsigned long ctr_ap_pci_addr; + + /*! Callbacks for driverlink, when needed. */ + struct efx_dl_callbacks *dl_callbacks; + + /*! Event handlers. */ + struct efhw_ev_handler *ev_handlers; + +}; + +#define linux_efhw_nic(_efhw_nic) \ + container_of(_efhw_nic, struct linux_efhw_nic, efrm_nic.efhw_nic) + +#endif /* __CI_DRIVER_RESOURCE_LINUX_RESOURCE__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/resource/efx_vi.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/resource/efx_vi.h @@ -0,0 +1,273 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains public EFX VI API to Solarflare resource manager. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__ +#define __CI_DRIVER_RESOURCE_EFX_VI_H__ + +/* Default size of event queue in the efx_vi resource. Copied from + * CI_CFG_NETIF_EVENTQ_SIZE */ +#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024 + +extern int efx_vi_eventq_size; + +/************************************************************************** + * efx_vi_state types, allocation and free + **************************************************************************/ + +/*! Handle for refering to a efx_vi */ +struct efx_vi_state; + +/*! + * Allocate an efx_vi, including event queue and pt_endpoint + * + * \param vih_out Pointer to a handle that is set on success + * \param ifindex Index of the network interface desired + * \return Zero on success (and vih_out set), non-zero on failure. + */ +extern int +efx_vi_alloc(struct efx_vi_state **vih_out, int ifindex); + +/*! + * Free a previously allocated efx_vi + * + * \param vih The handle of the efx_vi to free + */ +extern void +efx_vi_free(struct efx_vi_state *vih); + +/*! + * Reset a previously allocated efx_vi + * + * \param vih The handle of the efx_vi to reset + */ +extern void +efx_vi_reset(struct efx_vi_state *vih); + +/************************************************************************** + * efx_vi_eventq types and functions + **************************************************************************/ + +/*! + * Register a function to receive callbacks when event queue timeouts + * or wakeups occur. Only one function per efx_vi can be registered + * at once. + * + * \param vih The handle to identify the efx_vi + * \param callback The function to callback + * \param context An argument to pass to the callback function + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_eventq_register_callback(struct efx_vi_state *vih, + void (*callback)(void *context, int is_timeout), + void *context); + +/*! + * Remove the current eventq timeout or wakeup callback function + * + * \param vih The handle to identify the efx_vi + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_eventq_kill_callback(struct efx_vi_state *vih); + +/************************************************************************** + * efx_vi_dma_map types and functions + **************************************************************************/ + +/*! + * Handle for refering to a efx_vi + */ +struct efx_vi_dma_map_state; + +/*! + * Map a list of buffer pages so they are registered with the hardware + * + * \param vih The handle to identify the efx_vi + * \param addrs An array of page pointers to map + * \param n_addrs Length of the page pointer array. Must be a power of two. + * \param dmh_out Set on success to a handle used to refer to this mapping + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, + int n_pages, struct efx_vi_dma_map_state **dmh_out); +extern int +efx_vi_dma_map_addrs(struct efx_vi_state *vih, + unsigned long long *dev_bus_addrs, int n_pages, + struct efx_vi_dma_map_state **dmh_out); + +/*! + * Unmap a previously mapped set of pages so they are no longer registered + * with the hardware. + * + * \param vih The handle to identify the efx_vi + * \param dmh The handle to identify the dma mapping + */ +extern void +efx_vi_dma_unmap_pages(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); +extern void +efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); + +/*! + * Retrieve the buffer address of the mapping + * + * \param vih The handle to identify the efx_vi + * \param dmh The handle to identify the buffer mapping + * \return The buffer address on success, or zero on failure + */ +extern unsigned +efx_vi_dma_get_map_addr(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); + +/************************************************************************** + * efx_vi filter functions + **************************************************************************/ + +#define EFX_VI_STATIC_FILTERS 32 + +/*! Handle to refer to a filter instance */ +struct filter_resource_t; + +/*! + * Allocate and add a filter + * + * \param vih The handle to identify the efx_vi + * \param protocol The protocol of the new filter: UDP or TCP + * \param ip_addr_be32 The local ip address of the filter + * \param port_le16 The local port of the filter + * \param fh_out Set on success to be a handle to refer to this filter + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32, + int port_le16, struct filter_resource_t **fh_out); + +/*! + * Remove a filter and free resources associated with it + * + * \param vih The handle to identify the efx_vi + * \param fh The handle to identify the filter + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh); + +/************************************************************************** + * efx_vi hw resources types and functions + **************************************************************************/ + +/*! Constants for the type field in efx_vi_hw_resource */ +#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */ +#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */ +#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */ + +/* Address of event q pointer (EF1) */ +#define EFX_VI_HW_RESOURCE_EVQPTR 0x5 +/* Address of register pointer (Falcon A) */ +#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6 +/* Offset of register pointer (Falcon B) */ +#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7 +/* Address of mem KVA */ +#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8 +/* PFN of doorbell page (Falcon) */ +#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9 + +/*! How large an array to allocate for the get_() functions - smaller + than the total number of constants as some are mutually exclusive */ +#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7 + +/*! Constants for the mem_type field in efx_vi_hw_resource */ +#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */ +#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */ + +/*! + * Data structure providing information on a hardware resource mapping + */ +struct efx_vi_hw_resource { + u8 type; /*!< What this resource represents */ + u8 mem_type; /*!< What type of memory is it in, eg, + * host or iomem */ + u8 more_to_follow; /*!< Is this part of a multi-region resource */ + u32 length; /*!< Length of the resource in bytes */ + unsigned long address; /*!< Address of this resource */ +}; + +/*! + * Metadata concerning the list of hardware resource mappings + */ +struct efx_vi_hw_resource_metadata { + int evq_order; + int evq_offs; + int evq_capacity; + int instance; + unsigned rx_capacity; + unsigned tx_capacity; + int nic_arch; + int nic_revision; + char nic_variant; +}; + +/*! + * Obtain a list of hardware resource mappings, using virtual addresses + * + * \param vih The handle to identify the efx_vi + * \param mdata Pointer to a structure to receive the metadata + * \param hw_res_array An array to receive the list of hardware resources + * \param length The length of hw_res_array. Updated on success to contain + * the number of entries in the supplied array that were used. + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length); + +/*! + * Obtain a list of hardware resource mappings, using physical addresses + * + * \param vih The handle to identify the efx_vi + * \param mdata Pointer to a structure to receive the metadata + * \param hw_res_array An array to receive the list of hardware resources + * \param length The length of hw_res_array. Updated on success to contain + * the number of entries in the supplied array that were used. + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length); + +#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware.h @@ -0,0 +1,188 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC hardware interface. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFAB_HARDWARE_H__ +#define __CI_DRIVER_EFAB_HARDWARE_H__ + +#include "ci/driver/efab/hardware/workarounds.h" +#include + + +/*---------------------------------------------------------------------------- + * + * Common EtherFabric definitions + * + *---------------------------------------------------------------------------*/ + +#include +#include +#include + +/*---------------------------------------------------------------------------- + * + * EtherFabric varients + * + *---------------------------------------------------------------------------*/ + +#include + +/*---------------------------------------------------------------------------- + * + * EtherFabric Portable Hardware Layer defines + * + *---------------------------------------------------------------------------*/ + + /*-------------- Initialisation ------------ */ +#define efhw_nic_close_hardware(nic) \ + ((nic)->efhw_func->close_hardware(nic)) + +#define efhw_nic_init_hardware(nic, ev_handlers, mac_addr, non_irq_evq) \ + ((nic)->efhw_func->init_hardware((nic), (ev_handlers), (mac_addr), \ + (non_irq_evq))) + +/*-------------- Interrupt support ------------ */ +/** Handle interrupt. Return 0 if not handled, 1 if handled. */ +#define efhw_nic_interrupt(nic) \ + ((nic)->efhw_func->interrupt(nic)) + +#define efhw_nic_interrupt_enable(nic) \ + ((nic)->efhw_func->interrupt_enable(nic)) + +#define efhw_nic_interrupt_disable(nic) \ + ((nic)->efhw_func->interrupt_disable(nic)) + +#define efhw_nic_set_interrupt_moderation(nic, evq, val) \ + ((nic)->efhw_func->set_interrupt_moderation(nic, evq, val)) + +/*-------------- Event support ------------ */ + +#define efhw_nic_event_queue_enable(nic, evq, size, q_base, buf_base, \ + interrupting) \ + ((nic)->efhw_func->event_queue_enable((nic), (evq), (size), (q_base), \ + (buf_base), (interrupting))) + +#define efhw_nic_event_queue_disable(nic, evq, timer_only) \ + ((nic)->efhw_func->event_queue_disable(nic, evq, timer_only)) + +#define efhw_nic_wakeup_request(nic, q_base, index, evq) \ + ((nic)->efhw_func->wakeup_request(nic, q_base, index, evq)) + +#define efhw_nic_sw_event(nic, data, ev) \ + ((nic)->efhw_func->sw_event(nic, data, ev)) + +/*-------------- Filter support ------------ */ +#define efhw_nic_ipfilter_set(nic, type, index, dmaq, \ + saddr, sport, daddr, dport) \ + ((nic)->efhw_func->ipfilter_set(nic, type, index, dmaq, \ + saddr, sport, daddr, dport)) + +#define efhw_nic_ipfilter_clear(nic, index) \ + ((nic)->efhw_func->ipfilter_clear(nic, index)) + +/*-------------- DMA support ------------ */ +#define efhw_nic_dmaq_tx_q_init(nic, dmaq, evq, owner, tag, \ + dmaq_size, index, flags) \ + ((nic)->efhw_func->dmaq_tx_q_init(nic, dmaq, evq, owner, tag, \ + dmaq_size, index, flags)) + +#define efhw_nic_dmaq_rx_q_init(nic, dmaq, evq, owner, tag, \ + dmaq_size, index, flags) \ + ((nic)->efhw_func->dmaq_rx_q_init(nic, dmaq, evq, owner, tag, \ + dmaq_size, index, flags)) + +#define efhw_nic_dmaq_tx_q_disable(nic, dmaq) \ + ((nic)->efhw_func->dmaq_tx_q_disable(nic, dmaq)) + +#define efhw_nic_dmaq_rx_q_disable(nic, dmaq) \ + ((nic)->efhw_func->dmaq_rx_q_disable(nic, dmaq)) + +#define efhw_nic_flush_tx_dma_channel(nic, dmaq) \ + ((nic)->efhw_func->flush_tx_dma_channel(nic, dmaq)) + +#define efhw_nic_flush_rx_dma_channel(nic, dmaq) \ + ((nic)->efhw_func->flush_rx_dma_channel(nic, dmaq)) + +/*-------------- MAC Low level interface ---- */ +#define efhw_gmac_get_mac_addr(nic) \ + ((nic)->gmac->get_mac_addr((nic)->gmac)) + +/*-------------- Buffer table -------------- */ +#define efhw_nic_buffer_table_set(nic, addr, bufsz, region, \ + own_id, buf_id) \ + ((nic)->efhw_func->buffer_table_set(nic, addr, bufsz, region, \ + own_id, buf_id)) + +#define efhw_nic_buffer_table_set_n(nic, buf_id, addr, bufsz, \ + region, n_pages, own_id) \ + ((nic)->efhw_func->buffer_table_set_n(nic, buf_id, addr, bufsz, \ + region, n_pages, own_id)) + +#define efhw_nic_buffer_table_clear(nic, id, num) \ + ((nic)->efhw_func->buffer_table_clear(nic, id, num)) + +#define efhw_nic_buffer_table_commit(nic) \ + ((nic)->efhw_func->buffer_table_commit(nic)) + +/*-------------- New filter API ------------ */ +#define efhw_nic_filter_set(nic, spec, index_out) \ + ((nic)->efhw_func->filter_set(nic, spec, index_out)) + +#define efhw_nic_filter_clear(nic, type, index_out) \ + ((nic)->efhw_func->filter_clear(nic, type, index_out)) + + +/* --- DMA --- */ +#define EFHW_DMA_ADDRMASK (0xffffffffffffffffULL) + +/* --- Buffers --- */ +#define EFHW_BUFFER_ADDR FALCON_BUFFER_4K_ADDR +#define EFHW_BUFFER_PAGE FALCON_BUFFER_4K_PAGE +#define EFHW_BUFFER_OFF FALCON_BUFFER_4K_OFF + +/* --- Filters --- */ +#define EFHW_IP_FILTER_NUM FALCON_FILTER_TBL_NUM + +#define EFHW_MAX_PAGE_SIZE FALCON_MAX_PAGE_SIZE + +#if PAGE_SIZE <= EFHW_MAX_PAGE_SIZE +#define EFHW_NIC_PAGE_SIZE PAGE_SIZE +#else +#define EFHW_NIC_PAGE_SIZE EFHW_MAX_PAGE_SIZE +#endif +#define EFHW_NIC_PAGE_MASK (~(EFHW_NIC_PAGE_SIZE-1)) + +#endif /* __CI_DRIVER_EFAB_HARDWARE_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/workarounds.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/workarounds.h @@ -0,0 +1,67 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides workaround settings for EtherFabric NICs. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFAB_WORKAROUNDS_H__ +#define __CI_DRIVER_EFAB_WORKAROUNDS_H__ + +/*---------------------------------------------------------------------------- + * + * Hardware workarounds which have global scope + * + *---------------------------------------------------------------------------*/ + +#if defined(__CI_HARDWARE_CONFIG_FALCON_B0__) +/*------------------------------- B0 ---------------------------------------*/ + +#define BUG2175_WORKAROUND 0 /* TX event batching for dual port operation. + This removes the effect (dup TX events) + of the fix + (TX event per packet + batch events) */ +#define BUG5302_WORKAROUND 0 /* unstick TX DMAQ after out-of-range wr ptr */ +#define BUG5762_WORKAROUND 0 /* Set all queues to jumbo mode */ +#define BUG5391_WORKAROUND 0 /* Misaligned TX can't span 512-byte boundary */ +#define BUG7916_WORKAROUND 0 /* RX flush gets lost */ + +#else +/*------------------------------- A0/A1 ------------------------------------*/ + +#define BUG2175_WORKAROUND 1 /* TX event batching for dual port operation. + This removes the effect (dup TX events) + of the fix + (TX event per packet + batch events) */ +#define BUG5302_WORKAROUND 1 /* unstick TX DMAQ after out-of-range wr ptr */ +#define BUG5762_WORKAROUND 1 /* Set all queues to jumbo mode */ +#define BUG5391_WORKAROUND 1 /* Misaligned TX can't span 512-byte boundary */ +#define BUG7916_WORKAROUND 1 /* RX flush gets lost */ + +#endif /* B0/A01 */ + +#endif /* __CI_DRIVER_EFAB_WORKAROUNDS_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon.h @@ -0,0 +1,422 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC - EFXXXX (aka Falcon) specific + * definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ +#define __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ + +/*---------------------------------------------------------------------------- + * Compile options + *---------------------------------------------------------------------------*/ + +/* Falcon has an 8K maximum page size. */ +#define FALCON_MAX_PAGE_SIZE EFHW_8K + +/* include the register definitions */ +#include +#include +#include +#include + +#define FALCON_DMA_TX_DESC_BYTES 8 +#define FALCON_DMA_RX_PHYS_DESC_BYTES 8 +#define FALCON_DMA_RX_BUF_DESC_BYTES 4 + + +/* ---- efhw_event_t helpers --- */ + +#ifndef EFHW_IS_LITTLE_ENDIAN +#error This needs lots of cpu_to_le64s() in +#endif + +/*!\ TODO look at whether there is an efficiency gain to be had by + treating the event codes to 32bit masks as is done for EF1 + + These masks apply to the full 64 bits of the event to extract the + event code - followed by the common event codes to expect + */ +#define __FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1) +#define FALCON_EVENT_CODE_MASK \ + (__FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN) +#define FALCON_EVENT_EV_Q_ID_MASK \ + (__FALCON_OPEN_MASK(DRIVER_EV_EVQ_ID_WIDTH) << DRIVER_EV_EVQ_ID_LBN) +#define FALCON_EVENT_TX_FLUSH_Q_ID_MASK \ + (__FALCON_OPEN_MASK(DRIVER_EV_TX_DESCQ_ID_WIDTH) << \ + DRIVER_EV_TX_DESCQ_ID_LBN) +#define FALCON_EVENT_RX_FLUSH_Q_ID_MASK \ + (__FALCON_OPEN_MASK(DRIVER_EV_RX_DESCQ_ID_WIDTH) << \ + DRIVER_EV_RX_DESCQ_ID_LBN) +#define FALCON_EVENT_DRV_SUBCODE_MASK \ + (__FALCON_OPEN_MASK(DRIVER_EV_SUB_CODE_WIDTH) << \ + DRIVER_EV_SUB_CODE_LBN) + +#define FALCON_EVENT_FMT "[ev:%x:%08x:%08x]" +#define FALCON_EVENT_PRI_ARG(e) \ + ((unsigned)(((e).u64 & FALCON_EVENT_CODE_MASK) >> EV_CODE_LBN)), \ + ((unsigned)((e).u64 >> 32)), ((unsigned)((e).u64 & 0xFFFFFFFF)) + +#define FALCON_EVENT_CODE(evp) ((evp)->u64 & FALCON_EVENT_CODE_MASK) +#define FALCON_EVENT_WAKE_EVQ_ID(evp) \ + (((evp)->u64 & FALCON_EVENT_EV_Q_ID_MASK) >> DRIVER_EV_EVQ_ID_LBN) +#define FALCON_EVENT_TX_FLUSH_Q_ID(evp) \ + (((evp)->u64 & FALCON_EVENT_TX_FLUSH_Q_ID_MASK) >> \ + DRIVER_EV_TX_DESCQ_ID_LBN) +#define FALCON_EVENT_RX_FLUSH_Q_ID(evp) \ + (((evp)->u64 & FALCON_EVENT_RX_FLUSH_Q_ID_MASK) >> \ + DRIVER_EV_RX_DESCQ_ID_LBN) +#define FALCON_EVENT_DRIVER_SUBCODE(evp) \ + (((evp)->u64 & FALCON_EVENT_DRV_SUBCODE_MASK) >> \ + DRIVER_EV_SUB_CODE_LBN) + +#define FALCON_EVENT_CODE_CHAR ((uint64_t)DRIVER_EV_DECODE << EV_CODE_LBN) +#define FALCON_EVENT_CODE_SW ((uint64_t)DRV_GEN_EV_DECODE << EV_CODE_LBN) + + +/* so this is the size in bytes of an awful lot of things */ +#define FALCON_REGISTER128 (16) + +/* we define some unique dummy values as a debug aid */ +#ifdef _WIN32 +#define FALCON_ATOMIC_BASE 0xdeadbeef00000000ui64 +#else +#define FALCON_ATOMIC_BASE 0xdeadbeef00000000ULL +#endif +#define FALCON_ATOMIC_UPD_REG (FALCON_ATOMIC_BASE | 0x1) +#define FALCON_ATOMIC_PTR_TBL_REG (FALCON_ATOMIC_BASE | 0x2) +#define FALCON_ATOMIC_SRPM_UDP_EVQ_REG (FALCON_ATOMIC_BASE | 0x3) +#define FALCON_ATOMIC_RX_FLUSH_DESCQ (FALCON_ATOMIC_BASE | 0x4) +#define FALCON_ATOMIC_TX_FLUSH_DESCQ (FALCON_ATOMIC_BASE | 0x5) +#define FALCON_ATOMIC_INT_EN_REG (FALCON_ATOMIC_BASE | 0x6) +#define FALCON_ATOMIC_TIMER_CMD_REG (FALCON_ATOMIC_BASE | 0x7) +#define FALCON_ATOMIC_PACE_REG (FALCON_ATOMIC_BASE | 0x8) +#define FALCON_ATOMIC_INT_ACK_REG (FALCON_ATOMIC_BASE | 0x9) +/* XXX It crashed with odd value in FALCON_ATOMIC_INT_ADR_REG */ +#define FALCON_ATOMIC_INT_ADR_REG (FALCON_ATOMIC_BASE | 0xa) + +/*---------------------------------------------------------------------------- + * + * PCI control blocks for Falcon - + * (P) primary is for NET + * (S) secondary is for CHAR + * + *---------------------------------------------------------------------------*/ + +#define FALCON_P_CTR_AP_BAR 2 +#define FALCON_S_CTR_AP_BAR 0 +#define FALCON_S_DEVID 0x6703 + + +/*---------------------------------------------------------------------------- + * + * Falcon constants + * + *---------------------------------------------------------------------------*/ + +/* Note: the following constants have moved to values in struct efhw_nic: + * FALCON_EVQ_TBL_NUM -> nic->num_evqs + * FALCON_DMAQ_NUM -> nic->num_dmaqs + * FALCON_TIMERS_NUM -> nic->num_times + * These replacement constants are used as sanity checks in assertions in + * certain functions that don't have access to struct efhw_nic. + */ +#define FALCON_DMAQ_NUM_SANITY (EFHW_4K) +#define FALCON_EVQ_TBL_NUM_SANITY (EFHW_4K) +#define FALCON_TIMERS_NUM_SANITY (EFHW_4K) + +/* This value is an upper limit on the total number of filter table + * entries. The actual size of filter table is determined at runtime, as + * it can vary. + */ +#define FALCON_FILTER_TBL_NUM (EFHW_8K) + +/* max number of buffers which can be pushed before commiting */ +#define FALCON_BUFFER_UPD_MAX (128) + +/* We can tell falcon to write its RX buffers in 32 byte quantums, + and since we pad packets 2 bytes to the right we can't use + a full page (not unless we use jumbo mode for all queues) + + NOTE: tests/nic/dma.c assumes that the value here is the real NIC + value, so we explicitly round it down to the nearest 32 bytes */ + +/* #define FALCON_RX_USR_BUF_SIZE round_down(4096-2,32) */ +#define FALCON_RX_USR_BUF_SIZE 4064 + +#define FALCON_EVQ_RPTR_REG_P0 0x400 + +/*---------------------------------------------------------------------------- + * + * Falcon requires user-space descriptor pushes to be: + * dword[0-2]; wiob(); dword[3] + * + * Driver register access must be locked against other threads from + * the same driver but can be in any order: i.e dword[0-3]; wiob() + * + * The following helpers ensure that valid dword orderings are exercised + * + *---------------------------------------------------------------------------*/ + +/* A union to allow writting 64bit values as 32bit values, without + * hitting the compilers aliasing rules. We hope the compiler optimises + * away the copy's anyway */ +union __u64to32 { + uint64_t u64; + struct { +#ifdef EFHW_IS_LITTLE_ENDIAN + uint32_t a; + uint32_t b; +#else + uint32_t b; + uint32_t a; +#endif + } s; +}; + +static inline void +falcon_write_ddd_d(volatile char __iomem *kva, + uint32_t d0, uint32_t d1, uint32_t d2, uint32_t d3) +{ + writel(d0, kva + 0); + writel(d1, kva + 4); + writel(d2, kva + 8); + mmiowb(); + writel(d3, kva + 12); +} + +static inline void falcon_write_q(volatile char __iomem *kva, uint64_t q) +{ + union __u64to32 u; + u.u64 = q; + + writel(u.s.a, kva); + mmiowb(); + writel(u.s.b, kva + 4); +} + +static inline void falcon_read_q(volatile char __iomem *addr, uint64_t *q0) +{ + /* It is essential that we read dword0 first, so that + * the shadow register is updated with the latest value + * and we get a self consistent value. + */ + union __u64to32 u; + u.s.a = readl(addr); + rmb(); + u.s.b = readl(addr + 4); + + *q0 = u.u64; +} + +static inline void +falcon_write_qq(volatile char __iomem *kva, uint64_t q0, uint64_t q1) +{ + writeq(q0, kva + 0); + falcon_write_q(kva + 8, q1); +} + +static inline void +falcon_read_qq(volatile char __iomem *addr, uint64_t *q0, uint64_t *q1) +{ + falcon_read_q(addr, q0); + *q1 = readq(addr + 8); +} + + + +/*---------------------------------------------------------------------------- + * + * Buffer virtual addresses (4K buffers) + * + *---------------------------------------------------------------------------*/ + +/* Form a buffer virtual address from buffer ID and offset. If the offset +** is larger than the buffer size, then the buffer indexed will be +** calculated appropriately. It is the responsibility of the caller to +** ensure that they have valid buffers programmed at that address. +*/ +#define FALCON_VADDR_8K_S (13) +#define FALCON_VADDR_4K_S (12) +#define FALCON_VADDR_M 0xfffff /* post shift mask */ + +#define FALCON_BUFFER_8K_ADDR(id, off) (((id) << FALCON_VADDR_8K_S) + (off)) +#define FALCON_BUFFER_8K_PAGE(vaddr) \ + (((vaddr) >> FALCON_VADDR_8K_S) & FALCON_VADDR_M) +#define FALCON_BUFFER_8K_OFF(vaddr) \ + ((vaddr) & __FALCON_MASK32(FALCON_VADDR_8K_S)) + +#define FALCON_BUFFER_4K_ADDR(id, off) (((id) << FALCON_VADDR_4K_S) + (off)) +#define FALCON_BUFFER_4K_PAGE(vaddr) \ + (((vaddr) >> FALCON_VADDR_4K_S) & FALCON_VADDR_M) +#define FALCON_BUFFER_4K_OFF(vaddr) \ + ((vaddr) & __FALCON_MASK32(FALCON_VADDR_4K_S)) + +/*---------------------------------------------------------------------------- + * + * Timer helpers + * + *---------------------------------------------------------------------------*/ + +static inline int falcon_timer_page_addr(uint idx) +{ + + EFHW_ASSERT(TIMER_CMD_REG_KER_OFST == + (TIMER_CMD_REG_PAGE4_OFST - 4 * EFHW_8K)); + + EFHW_ASSERT(idx < FALCON_TIMERS_NUM_SANITY); + + if (idx < 4) + return TIMER_CMD_REG_KER_OFST + (idx * EFHW_8K); + else if (idx < 1024) + return TIMER_CMD_REG_PAGE4_OFST + ((idx - 4) * EFHW_8K); + else + return TIMER_CMD_REG_PAGE123K_OFST + ((idx - 1024) * EFHW_8K); +} + +#define FALCON_TIMER_PAGE_MASK (EFHW_8K-1) + +static inline int falcon_timer_page_offset(uint idx) +{ + return falcon_timer_page_addr(idx) & FALCON_TIMER_PAGE_MASK; +} + +/*---------------------------------------------------------------------------- + * + * DMA Queue helpers + * + *---------------------------------------------------------------------------*/ + +/* iSCSI queue for A1; see bug 5427 for more details. */ +#define FALCON_A1_ISCSI_DMAQ 4 + +/*! returns an address within a bar of the TX DMA doorbell */ +static inline uint falcon_tx_dma_page_addr(uint dmaq_idx) +{ + uint page; + + EFHW_ASSERT((((TX_DESC_UPD_REG_PAGE123K_OFST) & (EFHW_8K - 1)) == + (((TX_DESC_UPD_REG_PAGE4_OFST) & (EFHW_8K - 1))))); + + EFHW_ASSERT(dmaq_idx < FALCON_DMAQ_NUM_SANITY); + + if (dmaq_idx < 1024) + page = TX_DESC_UPD_REG_PAGE4_OFST + ((dmaq_idx - 4) * EFHW_8K); + else + page = + TX_DESC_UPD_REG_PAGE123K_OFST + + ((dmaq_idx - 1024) * EFHW_8K); + + return page; +} + +/*! returns an address within a bar of the RX DMA doorbell */ +static inline uint falcon_rx_dma_page_addr(uint dmaq_idx) +{ + uint page; + + EFHW_ASSERT((((RX_DESC_UPD_REG_PAGE123K_OFST) & (EFHW_8K - 1)) == + ((RX_DESC_UPD_REG_PAGE4_OFST) & (EFHW_8K - 1)))); + + EFHW_ASSERT(dmaq_idx < FALCON_DMAQ_NUM_SANITY); + + if (dmaq_idx < 1024) + page = RX_DESC_UPD_REG_PAGE4_OFST + ((dmaq_idx - 4) * EFHW_8K); + else + page = + RX_DESC_UPD_REG_PAGE123K_OFST + + ((dmaq_idx - 1024) * EFHW_8K); + + return page; +} + +/*! "page"=NIC-dependent register set size */ +#define FALCON_DMA_PAGE_MASK (EFHW_8K-1) + +/*! returns an address within a bar of the start of the "page" + containing the TX DMA doorbell */ +static inline int falcon_tx_dma_page_base(uint dma_idx) +{ + return falcon_tx_dma_page_addr(dma_idx) & ~FALCON_DMA_PAGE_MASK; +} + +/*! returns an address within a bar of the start of the "page" + containing the RX DMA doorbell */ +static inline int falcon_rx_dma_page_base(uint dma_idx) +{ + return falcon_rx_dma_page_addr(dma_idx) & ~FALCON_DMA_PAGE_MASK; +} + +/*! returns an offset within a "page" of the TX DMA doorbell */ +static inline int falcon_tx_dma_page_offset(uint dma_idx) +{ + return falcon_tx_dma_page_addr(dma_idx) & FALCON_DMA_PAGE_MASK; +} + +/*! returns an offset within a "page" of the RX DMA doorbell */ +static inline int falcon_rx_dma_page_offset(uint dma_idx) +{ + return falcon_rx_dma_page_addr(dma_idx) & FALCON_DMA_PAGE_MASK; +} + +/*---------------------------------------------------------------------------- + * + * Events + * + *---------------------------------------------------------------------------*/ + +/* Falcon nails down the event queue mappings */ +#define FALCON_EVQ_KERNEL0 (0) /* hardwired for net driver */ +#define FALCON_EVQ_CHAR (4) /* char driver's event queue */ + +/* reserved by the drivers */ +#define FALCON_EVQ_TBL_RESERVED (8) + +/* default DMA-Q sizes */ +#define FALCON_DMA_Q_DEFAULT_TX_SIZE 512 + +#define FALCON_DMA_Q_DEFAULT_RX_SIZE 512 + +#define FALCON_DMA_Q_DEFAULT_MMAP \ + (FALCON_DMA_Q_DEFAULT_TX_SIZE * (FALCON_DMA_TX_DESC_BYTES * 2)) + +/*---------------------------------------------------------------------------- + * + * DEBUG - Analyser trigger + * + *---------------------------------------------------------------------------*/ + +static inline void +falcon_deadbeef(volatile char __iomem *efhw_kva, unsigned what) +{ + writel(what, efhw_kva + 0x300); + mmiowb(); +} +#endif /* __CI_DRIVER_EFAB_HARDWARE_FALCON_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/common.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/common.h @@ -0,0 +1,68 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC hardware interface common + * definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ +#define __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ + +/*---------------------------------------------------------------------------- + * + * EtherFabric constants + * + *---------------------------------------------------------------------------*/ + +#define EFHW_1K 0x00000400u +#define EFHW_2K 0x00000800u +#define EFHW_4K 0x00001000u +#define EFHW_8K 0x00002000u +#define EFHW_16K 0x00004000u +#define EFHW_32K 0x00008000u +#define EFHW_64K 0x00010000u +#define EFHW_128K 0x00020000u +#define EFHW_256K 0x00040000u +#define EFHW_512K 0x00080000u +#define EFHW_1M 0x00100000u +#define EFHW_2M 0x00200000u +#define EFHW_4M 0x00400000u +#define EFHW_8M 0x00800000u +#define EFHW_16M 0x01000000u +#define EFHW_32M 0x02000000u +#define EFHW_48M 0x03000000u +#define EFHW_64M 0x04000000u +#define EFHW_128M 0x08000000u +#define EFHW_256M 0x10000000u +#define EFHW_512M 0x20000000u +#define EFHW_1G 0x40000000u +#define EFHW_2G 0x80000000u +#define EFHW_4G 0x100000000ULL +#define EFHW_8G 0x200000000ULL + +#endif /* __CI_DRIVER_EFAB_HARDWARE_COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_desc.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_desc.h @@ -0,0 +1,75 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC - EFXXXX (aka Falcon) descriptor + * definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*************---- Descriptors C Headers ----*************/ +/* Receive Kernel IP Descriptor */ + #define RX_KER_BUF_SIZE_LBN 48 + #define RX_KER_BUF_SIZE_WIDTH 14 + #define RX_KER_BUF_REGION_LBN 46 + #define RX_KER_BUF_REGION_WIDTH 2 + #define RX_KER_BUF_REGION0_DECODE 0 + #define RX_KER_BUF_REGION1_DECODE 1 + #define RX_KER_BUF_REGION2_DECODE 2 + #define RX_KER_BUF_REGION3_DECODE 3 + #define RX_KER_BUF_ADR_LBN 0 + #define RX_KER_BUF_ADR_WIDTH 46 +/* Receive User IP Descriptor */ + #define RX_USR_2BYTE_OFS_LBN 20 + #define RX_USR_2BYTE_OFS_WIDTH 12 + #define RX_USR_BUF_ID_LBN 0 + #define RX_USR_BUF_ID_WIDTH 20 +/* Transmit Kernel IP Descriptor */ + #define TX_KER_PORT_LBN 63 + #define TX_KER_PORT_WIDTH 1 + #define TX_KER_CONT_LBN 62 + #define TX_KER_CONT_WIDTH 1 + #define TX_KER_BYTE_CNT_LBN 48 + #define TX_KER_BYTE_CNT_WIDTH 14 + #define TX_KER_BUF_REGION_LBN 46 + #define TX_KER_BUF_REGION_WIDTH 2 + #define TX_KER_BUF_REGION0_DECODE 0 + #define TX_KER_BUF_REGION1_DECODE 1 + #define TX_KER_BUF_REGION2_DECODE 2 + #define TX_KER_BUF_REGION3_DECODE 3 + #define TX_KER_BUF_ADR_LBN 0 + #define TX_KER_BUF_ADR_WIDTH 46 +/* Transmit User IP Descriptor */ + #define TX_USR_PORT_LBN 47 + #define TX_USR_PORT_WIDTH 1 + #define TX_USR_CONT_LBN 46 + #define TX_USR_CONT_WIDTH 1 + #define TX_USR_BYTE_CNT_LBN 33 + #define TX_USR_BYTE_CNT_WIDTH 13 + #define TX_USR_BUF_ID_LBN 13 + #define TX_USR_BUF_ID_WIDTH 20 + #define TX_USR_BYTE_OFS_LBN 0 + #define TX_USR_BYTE_OFS_WIDTH 13 --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_intr_vec.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_intr_vec.h @@ -0,0 +1,44 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC - EFXXXX (aka Falcon) interrupt + * vector definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*************---- Interrupt Vector Format C Header ----*************/ +#define DW0_OFST 0x0 /* Double-word 0: Event queue FIFO interrupts */ + #define EVQ_FIFO_HF_LBN 1 + #define EVQ_FIFO_HF_WIDTH 1 + #define EVQ_FIFO_AF_LBN 0 + #define EVQ_FIFO_AF_WIDTH 1 +#define DW1_OFST 0x4 /* Double-word 1: Interrupt indicator */ + #define INT_FLAG_LBN 0 + #define INT_FLAG_WIDTH 1 +#define DW2_OFST 0x8 /* Double-word 2: Fatal interrupts */ + #define FATAL_INT_LBN 0 + #define FATAL_INT_WIDTH 1 --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_event.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_event.h @@ -0,0 +1,155 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC - EFXXXX (aka Falcon) event + * definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*************---- Events Format C Header ----*************/ +/*************---- Event entry ----*************/ + #define EV_CODE_LBN 60 + #define EV_CODE_WIDTH 4 + #define RX_IP_EV_DECODE 0 + #define TX_IP_EV_DECODE 2 + #define DRIVER_EV_DECODE 5 + #define GLOBAL_EV_DECODE 6 + #define DRV_GEN_EV_DECODE 7 + #define EV_DATA_LBN 0 + #define EV_DATA_WIDTH 60 +/******---- Receive IP events for both Kernel & User event queues ----******/ + #define RX_EV_PKT_OK_LBN 56 + #define RX_EV_PKT_OK_WIDTH 1 + #define RX_EV_BUF_OWNER_ID_ERR_LBN 54 + #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1 + #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52 + #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1 + #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51 + #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1 + #define RX_EV_ETH_CRC_ERR_LBN 50 + #define RX_EV_ETH_CRC_ERR_WIDTH 1 + #define RX_EV_FRM_TRUNC_LBN 49 + #define RX_EV_FRM_TRUNC_WIDTH 1 + #define RX_EV_DRIB_NIB_LBN 48 + #define RX_EV_DRIB_NIB_WIDTH 1 + #define RX_EV_TOBE_DISC_LBN 47 + #define RX_EV_TOBE_DISC_WIDTH 1 + #define RX_EV_PKT_TYPE_LBN 44 + #define RX_EV_PKT_TYPE_WIDTH 3 + #define RX_EV_PKT_TYPE_ETH_DECODE 0 + #define RX_EV_PKT_TYPE_LLC_DECODE 1 + #define RX_EV_PKT_TYPE_JUMBO_DECODE 2 + #define RX_EV_PKT_TYPE_VLAN_DECODE 3 + #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4 + #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5 + #define RX_EV_HDR_TYPE_LBN 42 + #define RX_EV_HDR_TYPE_WIDTH 2 + #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0 + #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1 + #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2 + #define RX_EV_HDR_TYPE_NON_IP_DECODE 3 + #define RX_EV_DESC_Q_EMPTY_LBN 41 + #define RX_EV_DESC_Q_EMPTY_WIDTH 1 + #define RX_EV_MCAST_HASH_MATCH_LBN 40 + #define RX_EV_MCAST_HASH_MATCH_WIDTH 1 + #define RX_EV_MCAST_PKT_LBN 39 + #define RX_EV_MCAST_PKT_WIDTH 1 + #define RX_EV_Q_LABEL_LBN 32 + #define RX_EV_Q_LABEL_WIDTH 5 + #define RX_JUMBO_CONT_LBN 31 + #define RX_JUMBO_CONT_WIDTH 1 + #define RX_SOP_LBN 15 + #define RX_SOP_WIDTH 1 + #define RX_PORT_LBN 30 + #define RX_PORT_WIDTH 1 + #define RX_EV_BYTE_CNT_LBN 16 + #define RX_EV_BYTE_CNT_WIDTH 14 + #define RX_iSCSI_PKT_OK_LBN 14 + #define RX_iSCSI_PKT_OK_WIDTH 1 + #define RX_ISCSI_DDIG_ERR_LBN 13 + #define RX_ISCSI_DDIG_ERR_WIDTH 1 + #define RX_ISCSI_HDIG_ERR_LBN 12 + #define RX_ISCSI_HDIG_ERR_WIDTH 1 + #define RX_EV_DESC_PTR_LBN 0 + #define RX_EV_DESC_PTR_WIDTH 12 +/******---- Transmit IP events for both Kernel & User event queues ----******/ + #define TX_EV_PKT_ERR_LBN 38 + #define TX_EV_PKT_ERR_WIDTH 1 + #define TX_EV_PKT_TOO_BIG_LBN 37 + #define TX_EV_PKT_TOO_BIG_WIDTH 1 + #define TX_EV_Q_LABEL_LBN 32 + #define TX_EV_Q_LABEL_WIDTH 5 + #define TX_EV_PORT_LBN 16 + #define TX_EV_PORT_WIDTH 1 + #define TX_EV_WQ_FF_FULL_LBN 15 + #define TX_EV_WQ_FF_FULL_WIDTH 1 + #define TX_EV_BUF_OWNER_ID_ERR_LBN 14 + #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1 + #define TX_EV_COMP_LBN 12 + #define TX_EV_COMP_WIDTH 1 + #define TX_EV_DESC_PTR_LBN 0 + #define TX_EV_DESC_PTR_WIDTH 12 +/*************---- Char or Kernel driver events ----*************/ + #define DRIVER_EV_SUB_CODE_LBN 56 + #define DRIVER_EV_SUB_CODE_WIDTH 4 + #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0 + #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1 + #define EVQ_INIT_DONE_EV_DECODE 0x2 + #define EVQ_NOT_EN_EV_DECODE 0x3 + #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4 + #define SRM_UPD_DONE_EV_DECODE 0x5 + #define WAKE_UP_EV_DECODE 0x6 + #define TX_PKT_NON_TCP_UDP_DECODE 0x9 + #define TIMER_EV_DECODE 0xA + #define RX_DSC_ERROR_EV_DECODE 0xE + #define DRIVER_EV_TX_DESCQ_ID_LBN 0 + #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12 + #define DRIVER_EV_RX_DESCQ_ID_LBN 0 + #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12 + #define DRIVER_EV_EVQ_ID_LBN 0 + #define DRIVER_EV_EVQ_ID_WIDTH 12 + #define DRIVER_TMR_ID_LBN 0 + #define DRIVER_TMR_ID_WIDTH 12 + #define DRIVER_EV_SRM_UPD_LBN 0 + #define DRIVER_EV_SRM_UPD_WIDTH 2 + #define SRM_CLR_EV_DECODE 0 + #define SRM_UPD_EV_DECODE 1 + #define SRM_ILLCLR_EV_DECODE 2 +/********---- Global events. Sent to both event queue 0 and 4. ----********/ + #define XFP_PHY_INTR_LBN 10 + #define XFP_PHY_INTR_WIDTH 1 + #define XG_PHY_INTR_LBN 9 + #define XG_PHY_INTR_WIDTH 1 + #define G_PHY1_INTR_LBN 8 + #define G_PHY1_INTR_WIDTH 1 + #define G_PHY0_INTR_LBN 7 + #define G_PHY0_INTR_WIDTH 1 +/*************---- Driver generated events ----*************/ + #define DRV_GEN_EV_CODE_LBN 60 + #define DRV_GEN_EV_CODE_WIDTH 4 + #define DRV_GEN_EV_DATA_LBN 0 + #define DRV_GEN_EV_DATA_WIDTH 60 --- linux-ec2-2.6.31.orig/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_core.h +++ linux-ec2-2.6.31/drivers/net/sfc/sfc_resource/ci/driver/efab/hardware/falcon/falcon_core.h @@ -0,0 +1,1147 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides EtherFabric NIC - EFXXXX (aka Falcon) core register + * definitions. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#define FALCON_EXTENDED_P_BAR 1 + +/*************---- Bus Interface Unit Registers C Header ----*************/ +#define IOM_IND_ADR_REG_OFST 0x0 /* IO-mapped indirect access address + register */ + #define IOM_AUTO_ADR_INC_EN_LBN 16 + #define IOM_AUTO_ADR_INC_EN_WIDTH 1 + #define IOM_IND_ADR_LBN 0 + #define IOM_IND_ADR_WIDTH 16 +#define IOM_IND_DAT_REG_OFST 0x4 /* IO-mapped indirect access data register */ + #define IOM_IND_DAT_LBN 0 + #define IOM_IND_DAT_WIDTH 32 +#define ADR_REGION_REG_KER_OFST 0x0 /* Address region register */ +#define ADR_REGION_REG_OFST 0x0 /* Address region register */ + #define ADR_REGION3_LBN 96 + #define ADR_REGION3_WIDTH 18 + #define ADR_REGION2_LBN 64 + #define ADR_REGION2_WIDTH 18 + #define ADR_REGION1_LBN 32 + #define ADR_REGION1_WIDTH 18 + #define ADR_REGION0_LBN 0 + #define ADR_REGION0_WIDTH 18 +#define INT_EN_REG_KER_OFST 0x10 /* Kernel driver Interrupt enable register */ + #define KER_INT_CHAR_LBN 4 + #define KER_INT_CHAR_WIDTH 1 + #define KER_INT_KER_LBN 3 + #define KER_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_EN_KER_LBN 2 + #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1 + #define SRM_PERR_INT_EN_KER_LBN 1 + #define SRM_PERR_INT_EN_KER_WIDTH 1 + #define DRV_INT_EN_KER_LBN 0 + #define DRV_INT_EN_KER_WIDTH 1 +#define INT_EN_REG_CHAR_OFST 0x20 /* Char Driver interrupt enable register */ + #define CHAR_INT_CHAR_LBN 4 + #define CHAR_INT_CHAR_WIDTH 1 + #define CHAR_INT_KER_LBN 3 + #define CHAR_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2 + #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1 + #define SRM_PERR_INT_EN_CHAR_LBN 1 + #define SRM_PERR_INT_EN_CHAR_WIDTH 1 + #define DRV_INT_EN_CHAR_LBN 0 + #define DRV_INT_EN_CHAR_WIDTH 1 +#define INT_ADR_REG_KER_OFST 0x30 /* Interrupt host address for Kernel driver */ + #define INT_ADR_KER_LBN 0 + #define INT_ADR_KER_WIDTH 64 + #define DRV_INT_KER_LBN 32 + #define DRV_INT_KER_WIDTH 1 + #define EV_FF_HALF_INT_KER_LBN 3 + #define EV_FF_HALF_INT_KER_WIDTH 1 + #define EV_FF_FULL_INT_KER_LBN 2 + #define EV_FF_FULL_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_KER_LBN 1 + #define ILL_ADR_ERR_INT_KER_WIDTH 1 + #define SRAM_PERR_INT_KER_LBN 0 + #define SRAM_PERR_INT_KER_WIDTH 1 +#define INT_ADR_REG_CHAR_OFST 0x40 /* Interrupt host address for Char driver */ + #define INT_ADR_CHAR_LBN 0 + #define INT_ADR_CHAR_WIDTH 64 + #define DRV_INT_CHAR_LBN 32 + #define DRV_INT_CHAR_WIDTH 1 + #define EV_FF_HALF_INT_CHAR_LBN 3 + #define EV_FF_HALF_INT_CHAR_WIDTH 1 + #define EV_FF_FULL_INT_CHAR_LBN 2 + #define EV_FF_FULL_INT_CHAR_WIDTH 1 + #define ILL_ADR_ERR_INT_CHAR_LBN 1 + #define ILL_ADR_ERR_INT_CHAR_WIDTH 1 + #define SRAM_PERR_INT_CHAR_LBN 0 + #define SRAM_PERR_INT_CHAR_WIDTH 1 +#define INT_ISR0_B0_OFST 0x90 /* B0 only */ +#define INT_ISR1_B0_OFST 0xA0 +#define INT_ACK_REG_KER_A1_OFST 0x50 /* Kernel interrupt acknowledge register */ + #define RESERVED_LBN 0 + #define RESERVED_WIDTH 32 +#define INT_ACK_REG_CHAR_A1_OFST 0x60 /* CHAR interrupt acknowledge register */ + #define RESERVED_LBN 0 + #define RESERVED_WIDTH 32 +/*************---- Global CSR Registers C Header ----*************/ +#define NIC_STAT_REG_KER_OFST 0x200 /* ASIC strap status register */ +#define NIC_STAT_REG_OFST 0x200 /* ASIC strap status register */ + #define ONCHIP_SRAM_LBN 16 + #define ONCHIP_SRAM_WIDTH 0 + #define STRAP_PINS_LBN 0 + #define STRAP_PINS_WIDTH 3 +#define GPIO_CTL_REG_KER_OFST 0x210 /* GPIO control register */ +#define GPIO_CTL_REG_OFST 0x210 /* GPIO control register */ + #define GPIO_OEN_LBN 24 + #define GPIO_OEN_WIDTH 4 + #define GPIO_OUT_LBN 16 + #define GPIO_OUT_WIDTH 4 + #define GPIO_IN_LBN 8 + #define GPIO_IN_WIDTH 4 + #define GPIO_PWRUP_VALUE_LBN 0 + #define GPIO_PWRUP_VALUE_WIDTH 4 +#define GLB_CTL_REG_KER_OFST 0x220 /* Global control register */ +#define GLB_CTL_REG_OFST 0x220 /* Global control register */ + #define SWRST_LBN 0 + #define SWRST_WIDTH 1 +#define FATAL_INTR_REG_KER_OFST 0x230 /* Fatal interrupt register for Kernel */ + #define PCI_BUSERR_INT_KER_EN_LBN 43 + #define PCI_BUSERR_INT_KER_EN_WIDTH 1 + #define SRAM_OOB_INT_KER_EN_LBN 42 + #define SRAM_OOB_INT_KER_EN_WIDTH 1 + #define BUFID_OOB_INT_KER_EN_LBN 41 + #define BUFID_OOB_INT_KER_EN_WIDTH 1 + #define MEM_PERR_INT_KER_EN_LBN 40 + #define MEM_PERR_INT_KER_EN_WIDTH 1 + #define RBUF_OWN_INT_KER_EN_LBN 39 + #define RBUF_OWN_INT_KER_EN_WIDTH 1 + #define TBUF_OWN_INT_KER_EN_LBN 38 + #define TBUF_OWN_INT_KER_EN_WIDTH 1 + #define RDESCQ_OWN_INT_KER_EN_LBN 37 + #define RDESCQ_OWN_INT_KER_EN_WIDTH 1 + #define TDESCQ_OWN_INT_KER_EN_LBN 36 + #define TDESCQ_OWN_INT_KER_EN_WIDTH 1 + #define EVQ_OWN_INT_KER_EN_LBN 35 + #define EVQ_OWN_INT_KER_EN_WIDTH 1 + #define EVFF_OFLO_INT_KER_EN_LBN 34 + #define EVFF_OFLO_INT_KER_EN_WIDTH 1 + #define ILL_ADR_INT_KER_EN_LBN 33 + #define ILL_ADR_INT_KER_EN_WIDTH 1 + #define SRM_PERR_INT_KER_EN_LBN 32 + #define SRM_PERR_INT_KER_EN_WIDTH 1 + #define PCI_BUSERR_INT_KER_LBN 11 + #define PCI_BUSERR_INT_KER_WIDTH 1 + #define SRAM_OOB_INT_KER_LBN 10 + #define SRAM_OOB_INT_KER_WIDTH 1 + #define BUFID_OOB_INT_KER_LBN 9 + #define BUFID_OOB_INT_KER_WIDTH 1 + #define MEM_PERR_INT_KER_LBN 8 + #define MEM_PERR_INT_KER_WIDTH 1 + #define RBUF_OWN_INT_KER_LBN 7 + #define RBUF_OWN_INT_KER_WIDTH 1 + #define TBUF_OWN_INT_KER_LBN 6 + #define TBUF_OWN_INT_KER_WIDTH 1 + #define RDESCQ_OWN_INT_KER_LBN 5 + #define RDESCQ_OWN_INT_KER_WIDTH 1 + #define TDESCQ_OWN_INT_KER_LBN 4 + #define TDESCQ_OWN_INT_KER_WIDTH 1 + #define EVQ_OWN_INT_KER_LBN 3 + #define EVQ_OWN_INT_KER_WIDTH 1 + #define EVFF_OFLO_INT_KER_LBN 2 + #define EVFF_OFLO_INT_KER_WIDTH 1 + #define ILL_ADR_INT_KER_LBN 1 + #define ILL_ADR_INT_KER_WIDTH 1 + #define SRM_PERR_INT_KER_LBN 0 + #define SRM_PERR_INT_KER_WIDTH 1 +#define FATAL_INTR_REG_OFST 0x240 /* Fatal interrupt register for Char */ + #define PCI_BUSERR_INT_CHAR_EN_LBN 43 + #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1 + #define SRAM_OOB_INT_CHAR_EN_LBN 42 + #define SRAM_OOB_INT_CHAR_EN_WIDTH 1 + #define BUFID_OOB_INT_CHAR_EN_LBN 41 + #define BUFID_OOB_INT_CHAR_EN_WIDTH 1 + #define MEM_PERR_INT_CHAR_EN_LBN 40 + #define MEM_PERR_INT_CHAR_EN_WIDTH 1 + #define RBUF_OWN_INT_CHAR_EN_LBN 39 + #define RBUF_OWN_INT_CHAR_EN_WIDTH 1 + #define TBUF_OWN_INT_CHAR_EN_LBN 38 + #define TBUF_OWN_INT_CHAR_EN_WIDTH 1 + #define RDESCQ_OWN_INT_CHAR_EN_LBN 37 + #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1 + #define TDESCQ_OWN_INT_CHAR_EN_LBN 36 + #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1 + #define EVQ_OWN_INT_CHAR_EN_LBN 35 + #define EVQ_OWN_INT_CHAR_EN_WIDTH 1 + #define EVFF_OFLO_INT_CHAR_EN_LBN 34 + #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1 + #define ILL_ADR_INT_CHAR_EN_LBN 33 + #define ILL_ADR_INT_CHAR_EN_WIDTH 1 + #define SRM_PERR_INT_CHAR_EN_LBN 32 + #define SRM_PERR_INT_CHAR_EN_WIDTH 1 + #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL + #define PCI_BUSERR_INT_CHAR_LBN 11 + #define PCI_BUSERR_INT_CHAR_WIDTH 1 + #define SRAM_OOB_INT_CHAR_LBN 10 + #define SRAM_OOB_INT_CHAR_WIDTH 1 + #define BUFID_OOB_INT_CHAR_LBN 9 + #define BUFID_OOB_INT_CHAR_WIDTH 1 + #define MEM_PERR_INT_CHAR_LBN 8 + #define MEM_PERR_INT_CHAR_WIDTH 1 + #define RBUF_OWN_INT_CHAR_LBN 7 + #define RBUF_OWN_INT_CHAR_WIDTH 1 + #define TBUF_OWN_INT_CHAR_LBN 6 + #define TBUF_OWN_INT_CHAR_WIDTH 1 + #define RDESCQ_OWN_INT_CHAR_LBN 5 + #define RDESCQ_OWN_INT_CHAR_WIDTH 1 + #define TDESCQ_OWN_INT_CHAR_LBN 4 + #define TDESCQ_OWN_INT_CHAR_WIDTH 1 + #define EVQ_OWN_INT_CHAR_LBN 3 + #define EVQ_OWN_INT_CHAR_WIDTH 1 + #define EVFF_OFLO_INT_CHAR_LBN 2 + #define EVFF_OFLO_INT_CHAR_WIDTH 1 + #define ILL_ADR_INT_CHAR_LBN 1 + #define ILL_ADR_INT_CHAR_WIDTH 1 + #define SRM_PERR_INT_CHAR_LBN 0 + #define SRM_PERR_INT_CHAR_WIDTH 1 +#define DP_CTRL_REG_OFST 0x250 /* Datapath control register */ + #define FLS_EVQ_ID_LBN 0 + #define FLS_EVQ_ID_WIDTH 12 +#define MEM_STAT_REG_KER_OFST 0x260 /* Memory status register */ +#define MEM_STAT_REG_OFST 0x260 /* Memory status register */ + #define MEM_PERR_VEC_LBN 53 + #define MEM_PERR_VEC_WIDTH 38 + #define MBIST_CORR_LBN 38 + #define MBIST_CORR_WIDTH 15 + #define MBIST_ERR_LBN 0 + #define MBIST_ERR_WIDTH 38 +#define DEBUG_REG_KER_OFST 0x270 /* Debug register */ +#define DEBUG_REG_OFST 0x270 /* Debug register */ + #define DEBUG_BLK_SEL2_LBN 47 + #define DEBUG_BLK_SEL2_WIDTH 3 + #define DEBUG_BLK_SEL1_LBN 44 + #define DEBUG_BLK_SEL1_WIDTH 3 + #define DEBUG_BLK_SEL0_LBN 41 + #define DEBUG_BLK_SEL0_WIDTH 3 + #define MISC_DEBUG_ADDR_LBN 36 + #define MISC_DEBUG_ADDR_WIDTH 5 + #define SERDES_DEBUG_ADDR_LBN 31 + #define SERDES_DEBUG_ADDR_WIDTH 5 + #define EM_DEBUG_ADDR_LBN 26 + #define EM_DEBUG_ADDR_WIDTH 5 + #define SR_DEBUG_ADDR_LBN 21 + #define SR_DEBUG_ADDR_WIDTH 5 + #define EV_DEBUG_ADDR_LBN 16 + #define EV_DEBUG_ADDR_WIDTH 5 + #define RX_DEBUG_ADDR_LBN 11 + #define RX_DEBUG_ADDR_WIDTH 5 + #define TX_DEBUG_ADDR_LBN 6 + #define TX_DEBUG_ADDR_WIDTH 5 + #define BIU_DEBUG_ADDR_LBN 1 + #define BIU_DEBUG_ADDR_WIDTH 5 + #define DEBUG_EN_LBN 0 + #define DEBUG_EN_WIDTH 1 +#define DRIVER_REG0_KER_OFST 0x280 /* Driver scratch register 0 */ +#define DRIVER_REG0_OFST 0x280 /* Driver scratch register 0 */ + #define DRIVER_DW0_LBN 0 + #define DRIVER_DW0_WIDTH 32 +#define DRIVER_REG1_KER_OFST 0x290 /* Driver scratch register 1 */ +#define DRIVER_REG1_OFST 0x290 /* Driver scratch register 1 */ + #define DRIVER_DW1_LBN 0 + #define DRIVER_DW1_WIDTH 32 +#define DRIVER_REG2_KER_OFST 0x2A0 /* Driver scratch register 2 */ +#define DRIVER_REG2_OFST 0x2A0 /* Driver scratch register 2 */ + #define DRIVER_DW2_LBN 0 + #define DRIVER_DW2_WIDTH 32 +#define DRIVER_REG3_KER_OFST 0x2B0 /* Driver scratch register 3 */ +#define DRIVER_REG3_OFST 0x2B0 /* Driver scratch register 3 */ + #define DRIVER_DW3_LBN 0 + #define DRIVER_DW3_WIDTH 32 +#define DRIVER_REG4_KER_OFST 0x2C0 /* Driver scratch register 4 */ +#define DRIVER_REG4_OFST 0x2C0 /* Driver scratch register 4 */ + #define DRIVER_DW4_LBN 0 + #define DRIVER_DW4_WIDTH 32 +#define DRIVER_REG5_KER_OFST 0x2D0 /* Driver scratch register 5 */ +#define DRIVER_REG5_OFST 0x2D0 /* Driver scratch register 5 */ + #define DRIVER_DW5_LBN 0 + #define DRIVER_DW5_WIDTH 32 +#define DRIVER_REG6_KER_OFST 0x2E0 /* Driver scratch register 6 */ +#define DRIVER_REG6_OFST 0x2E0 /* Driver scratch register 6 */ + #define DRIVER_DW6_LBN 0 + #define DRIVER_DW6_WIDTH 32 +#define DRIVER_REG7_KER_OFST 0x2F0 /* Driver scratch register 7 */ +#define DRIVER_REG7_OFST 0x2F0 /* Driver scratch register 7 */ + #define DRIVER_DW7_LBN 0 + #define DRIVER_DW7_WIDTH 32 +#define ALTERA_BUILD_REG_OFST 0x300 /* Altera build register */ +#define ALTERA_BUILD_REG_OFST 0x300 /* Altera build register */ + #define ALTERA_BUILD_VER_LBN 0 + #define ALTERA_BUILD_VER_WIDTH 32 + +/* so called CSR spare register + - contains separate parity enable bits for the various internal memory + blocks */ +#define MEM_PARITY_ERR_EN_REG_KER 0x310 +#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64 +#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38 +#define MEM_PARITY_TX_DATA_EN_LBN 72 +#define MEM_PARITY_TX_DATA_EN_WIDTH 2 + +/*************---- Event & Timer Module Registers C Header ----*************/ + +#if FALCON_EXTENDED_P_BAR +#define EVQ_RPTR_REG_KER_OFST 0x11B00 /* Event queue read pointer register */ +#else +#define EVQ_RPTR_REG_KER_OFST 0x1B00 /* Event queue read pointer register */ +#endif + +#define EVQ_RPTR_REG_OFST 0xFA0000 /* Event queue read pointer register + array. */ + #define EVQ_RPTR_LBN 0 + #define EVQ_RPTR_WIDTH 15 + +#if FALCON_EXTENDED_P_BAR +#define EVQ_PTR_TBL_KER_OFST 0x11A00 /* Event queue pointer table for kernel + access */ +#else +#define EVQ_PTR_TBL_KER_OFST 0x1A00 /* Event queue pointer table for kernel + access */ +#endif + +#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 /* Event queue pointer table for char + direct access */ + #define EVQ_WKUP_OR_INT_EN_LBN 39 + #define EVQ_WKUP_OR_INT_EN_WIDTH 1 + #define EVQ_NXT_WPTR_LBN 24 + #define EVQ_NXT_WPTR_WIDTH 15 + #define EVQ_EN_LBN 23 + #define EVQ_EN_WIDTH 1 + #define EVQ_SIZE_LBN 20 + #define EVQ_SIZE_WIDTH 3 + #define EVQ_BUF_BASE_ID_LBN 0 + #define EVQ_BUF_BASE_ID_WIDTH 20 +#define TIMER_CMD_REG_KER_OFST 0x420 /* Timer table for kernel access. + Page-mapped */ +#define TIMER_CMD_REG_PAGE4_OFST 0x8420 /* Timer table for user-level access. + Page-mapped. For lowest 1K queues. + */ +#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 /* Timer table for user-level + access. Page-mapped. + For upper 3K queues. */ +#define TIMER_TBL_OFST 0xF70000 /* Timer table for char driver direct access */ + #define TIMER_MODE_LBN 12 + #define TIMER_MODE_WIDTH 2 + #define TIMER_VAL_LBN 0 + #define TIMER_VAL_WIDTH 12 + #define TIMER_MODE_INT_HLDOFF 2 + #define EVQ_BUF_SIZE_LBN 0 + #define EVQ_BUF_SIZE_WIDTH 1 +#define DRV_EV_REG_KER_OFST 0x440 /* Driver generated event register */ +#define DRV_EV_REG_OFST 0x440 /* Driver generated event register */ + #define DRV_EV_QID_LBN 64 + #define DRV_EV_QID_WIDTH 12 + #define DRV_EV_DATA_LBN 0 + #define DRV_EV_DATA_WIDTH 64 +#define EVQ_CTL_REG_KER_OFST 0x450 /* Event queue control register */ +#define EVQ_CTL_REG_OFST 0x450 /* Event queue control register */ + #define RX_EVQ_WAKEUP_MASK_B0_LBN 15 + #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6 + #define EVQ_OWNERR_CTL_LBN 14 + #define EVQ_OWNERR_CTL_WIDTH 1 + #define EVQ_FIFO_AF_TH_LBN 8 + #define EVQ_FIFO_AF_TH_WIDTH 6 + #define EVQ_FIFO_NOTAF_TH_LBN 0 + #define EVQ_FIFO_NOTAF_TH_WIDTH 6 +/*************---- SRAM Module Registers C Header ----*************/ +#define BUF_TBL_CFG_REG_KER_OFST 0x600 /* Buffer table configuration register */ +#define BUF_TBL_CFG_REG_OFST 0x600 /* Buffer table configuration register */ + #define BUF_TBL_MODE_LBN 3 + #define BUF_TBL_MODE_WIDTH 1 +#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 /* SRAM receive descriptor cache + configuration register */ +#define SRM_RX_DC_CFG_REG_OFST 0x610 /* SRAM receive descriptor cache + configuration register */ + #define SRM_RX_DC_BASE_ADR_LBN 0 + #define SRM_RX_DC_BASE_ADR_WIDTH 21 +#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 /* SRAM transmit descriptor cache + configuration register */ +#define SRM_TX_DC_CFG_REG_OFST 0x620 /* SRAM transmit descriptor cache + configuration register */ + #define SRM_TX_DC_BASE_ADR_LBN 0 + #define SRM_TX_DC_BASE_ADR_WIDTH 21 +#define SRM_CFG_REG_KER_OFST 0x630 /* SRAM configuration register */ +#define SRM_CFG_REG_OFST 0x630 /* SRAM configuration register */ + #define SRAM_OOB_ADR_INTEN_LBN 5 + #define SRAM_OOB_ADR_INTEN_WIDTH 1 + #define SRAM_OOB_BUF_INTEN_LBN 4 + #define SRAM_OOB_BUF_INTEN_WIDTH 1 + #define SRAM_BT_INIT_EN_LBN 3 + #define SRAM_BT_INIT_EN_WIDTH 1 + #define SRM_NUM_BANK_LBN 2 + #define SRM_NUM_BANK_WIDTH 1 + #define SRM_BANK_SIZE_LBN 0 + #define SRM_BANK_SIZE_WIDTH 2 +#define BUF_TBL_UPD_REG_KER_OFST 0x650 /* Buffer table update register */ +#define BUF_TBL_UPD_REG_OFST 0x650 /* Buffer table update register */ + #define BUF_UPD_CMD_LBN 63 + #define BUF_UPD_CMD_WIDTH 1 + #define BUF_CLR_CMD_LBN 62 + #define BUF_CLR_CMD_WIDTH 1 + #define BUF_CLR_END_ID_LBN 32 + #define BUF_CLR_END_ID_WIDTH 20 + #define BUF_CLR_START_ID_LBN 0 + #define BUF_CLR_START_ID_WIDTH 20 +#define SRM_UPD_EVQ_REG_KER_OFST 0x660 /* Buffer table update register */ +#define SRM_UPD_EVQ_REG_OFST 0x660 /* Buffer table update register */ + #define SRM_UPD_EVQ_ID_LBN 0 + #define SRM_UPD_EVQ_ID_WIDTH 12 +#define SRAM_PARITY_REG_KER_OFST 0x670 /* SRAM parity register. */ +#define SRAM_PARITY_REG_OFST 0x670 /* SRAM parity register. */ + #define FORCE_SRAM_PERR_LBN 0 + #define FORCE_SRAM_PERR_WIDTH 1 + +#if FALCON_EXTENDED_P_BAR +#define BUF_HALF_TBL_KER_OFST 0x18000 /* Buffer table in half buffer table + mode direct access by kernel driver */ +#else +#define BUF_HALF_TBL_KER_OFST 0x8000 /* Buffer table in half buffer table + mode direct access by kernel driver */ +#endif + + +#define BUF_HALF_TBL_OFST 0x800000 /* Buffer table in half buffer table mode + direct access by char driver */ + #define BUF_ADR_HBUF_ODD_LBN 44 + #define BUF_ADR_HBUF_ODD_WIDTH 20 + #define BUF_OWNER_ID_HBUF_ODD_LBN 32 + #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12 + #define BUF_ADR_HBUF_EVEN_LBN 12 + #define BUF_ADR_HBUF_EVEN_WIDTH 20 + #define BUF_OWNER_ID_HBUF_EVEN_LBN 0 + #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12 + + +#if FALCON_EXTENDED_P_BAR +#define BUF_FULL_TBL_KER_OFST 0x18000 /* Buffer table in full buffer table + mode direct access by kernel driver */ +#else +#define BUF_FULL_TBL_KER_OFST 0x8000 /* Buffer table in full buffer table mode + direct access by kernel driver */ +#endif + + + + +#define BUF_FULL_TBL_OFST 0x800000 /* Buffer table in full buffer table mode + direct access by char driver */ + #define IP_DAT_BUF_SIZE_LBN 50 + #define IP_DAT_BUF_SIZE_WIDTH 1 + #define BUF_ADR_REGION_LBN 48 + #define BUF_ADR_REGION_WIDTH 2 + #define BUF_ADR_FBUF_LBN 14 + #define BUF_ADR_FBUF_WIDTH 34 + #define BUF_OWNER_ID_FBUF_LBN 0 + #define BUF_OWNER_ID_FBUF_WIDTH 14 +#define SRM_DBG_REG_OFST 0x3000000 /* SRAM debug access */ + #define SRM_DBG_LBN 0 + #define SRM_DBG_WIDTH 64 +/*************---- RX Datapath Registers C Header ----*************/ + +#define RX_CFG_REG_KER_OFST 0x800 /* Receive configuration register */ +#define RX_CFG_REG_OFST 0x800 /* Receive configuration register */ + +#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029) +# if !defined(FALCON_128K_RXFIFO) +# define FALCON_128K_RXFIFO +# endif +#endif + +#if defined(FALCON_128K_RXFIFO) + +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 47 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 46 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 45 + #define RX_HASH_ALG_B0_WIDTH 1 + #define RX_HASH_INSERT_HDR_B0_LBN 44 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 43 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */ + #define RX_RDW_PATCH_EN_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 39 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 + #define RX_OWNERR_CTL_B0_LBN 38 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 33 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 28 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 19 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 10 + #define RX_XON_MAC_TH_B0_WIDTH 9 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 9 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#elif !defined(FALCON_PRE_02020029) +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 45 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 44 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 43 + #define RX_HASH_ALG_B0_WIDTH 41 + #define RX_HASH_INSERT_HDR_B0_LBN 42 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 41 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 37 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 + #define RX_OWNERR_CTL_B0_LBN 36 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 31 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 26 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 17 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 9 + #define RX_XON_MAC_TH_B0_WIDTH 8 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 8 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#else +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 43 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 42 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 41 + #define RX_HASH_ALG_B0_WIDTH 41 + #define RX_HASH_INSERT_HDR_B0_LBN 40 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 35 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 35 + #define RX_PCI_BURST_SIZE_B0_WIDTH 2 + #define RX_OWNERR_CTL_B0_LBN 34 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 29 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 24 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 15 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 8 + #define RX_XON_MAC_TH_B0_WIDTH 7 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 7 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#endif + +/* A0/A1 */ + #define RX_PUSH_EN_A1_LBN 35 + #define RX_PUSH_EN_A1_WIDTH 1 + #define RX_PCI_BURST_SIZE_A1_LBN 31 + #define RX_PCI_BURST_SIZE_A1_WIDTH 3 + #define RX_OWNERR_CTL_A1_LBN 30 + #define RX_OWNERR_CTL_A1_WIDTH 1 + #define RX_XON_TX_TH_A1_LBN 25 + #define RX_XON_TX_TH_A1_WIDTH 5 + #define RX_XOFF_TX_TH_A1_LBN 20 + #define RX_XOFF_TX_TH_A1_WIDTH 5 + #define RX_USR_BUF_SIZE_A1_LBN 11 + #define RX_USR_BUF_SIZE_A1_WIDTH 9 + #define RX_XON_MAC_TH_A1_LBN 6 + #define RX_XON_MAC_TH_A1_WIDTH 5 + #define RX_XOFF_MAC_TH_A1_LBN 1 + #define RX_XOFF_MAC_TH_A1_WIDTH 5 + #define RX_XOFF_MAC_EN_A1_LBN 0 + #define RX_XOFF_MAC_EN_A1_WIDTH 1 + +#define RX_FILTER_CTL_REG_OFST 0x810 /* Receive filter control registers */ + #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40 + #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1 + #define UDP_FULL_SRCH_LIMIT_LBN 32 + #define UDP_FULL_SRCH_LIMIT_WIDTH 8 + #define NUM_KER_LBN 24 + #define NUM_KER_WIDTH 2 + #define UDP_WILD_SRCH_LIMIT_LBN 16 + #define UDP_WILD_SRCH_LIMIT_WIDTH 8 + #define TCP_WILD_SRCH_LIMIT_LBN 8 + #define TCP_WILD_SRCH_LIMIT_WIDTH 8 + #define TCP_FULL_SRCH_LIMIT_LBN 0 + #define TCP_FULL_SRCH_LIMIT_WIDTH 8 +#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 /* Receive flush descriptor queue + register */ +#define RX_FLUSH_DESCQ_REG_OFST 0x820 /* Receive flush descriptor queue + register */ + #define RX_FLUSH_DESCQ_CMD_LBN 24 + #define RX_FLUSH_DESCQ_CMD_WIDTH 1 + #define RX_FLUSH_EVQ_ID_LBN 12 + #define RX_FLUSH_EVQ_ID_WIDTH 12 + #define RX_FLUSH_DESCQ_LBN 0 + #define RX_FLUSH_DESCQ_WIDTH 12 +#define RX_DESC_UPD_REG_KER_OFST 0x830 /* Kernel receive descriptor update + register. Page-mapped */ +#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 /* Char & user receive descriptor + update register. Page-mapped. + For lowest 1K queues. */ +#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 /* Char & user receive + descriptor update register. + Page-mapped. For upper + 3K queues. */ + #define RX_DESC_WPTR_LBN 96 + #define RX_DESC_WPTR_WIDTH 12 + #define RX_DESC_PUSH_CMD_LBN 95 + #define RX_DESC_PUSH_CMD_WIDTH 1 + #define RX_DESC_LBN 0 + #define RX_DESC_WIDTH 64 + #define RX_KER_DESC_LBN 0 + #define RX_KER_DESC_WIDTH 64 + #define RX_USR_DESC_LBN 0 + #define RX_USR_DESC_WIDTH 32 +#define RX_DC_CFG_REG_KER_OFST 0x840 /* Receive descriptor cache + configuration register */ +#define RX_DC_CFG_REG_OFST 0x840 /* Receive descriptor cache + configuration register */ + #define RX_DC_SIZE_LBN 0 + #define RX_DC_SIZE_WIDTH 2 +#define RX_DC_PF_WM_REG_KER_OFST 0x850 /* Receive descriptor cache pre-fetch + watermark register */ +#define RX_DC_PF_WM_REG_OFST 0x850 /* Receive descriptor cache pre-fetch + watermark register */ + #define RX_DC_PF_LWM_LO_LBN 0 + #define RX_DC_PF_LWM_LO_WIDTH 6 + +#define RX_RSS_TKEY_B0_OFST 0x860 /* RSS Toeplitz hash key (B0 only) */ + +#define RX_NODESC_DROP_REG 0x880 + #define RX_NODESC_DROP_CNT_LBN 0 + #define RX_NODESC_DROP_CNT_WIDTH 16 + +#define XM_TX_CFG_REG_OFST 0x1230 + #define XM_AUTO_PAD_LBN 5 + #define XM_AUTO_PAD_WIDTH 1 + +#define RX_FILTER_TBL0_OFST 0xF00000 /* Receive filter table - even entries */ + #define RSS_EN_0_B0_LBN 110 + #define RSS_EN_0_B0_WIDTH 1 + #define SCATTER_EN_0_B0_LBN 109 + #define SCATTER_EN_0_B0_WIDTH 1 + #define TCP_UDP_0_LBN 108 + #define TCP_UDP_0_WIDTH 1 + #define RXQ_ID_0_LBN 96 + #define RXQ_ID_0_WIDTH 12 + #define DEST_IP_0_LBN 64 + #define DEST_IP_0_WIDTH 32 + #define DEST_PORT_TCP_0_LBN 48 + #define DEST_PORT_TCP_0_WIDTH 16 + #define SRC_IP_0_LBN 16 + #define SRC_IP_0_WIDTH 32 + #define SRC_TCP_DEST_UDP_0_LBN 0 + #define SRC_TCP_DEST_UDP_0_WIDTH 16 +#define RX_FILTER_TBL1_OFST 0xF00010 /* Receive filter table - odd entries */ + #define RSS_EN_1_B0_LBN 110 + #define RSS_EN_1_B0_WIDTH 1 + #define SCATTER_EN_1_B0_LBN 109 + #define SCATTER_EN_1_B0_WIDTH 1 + #define TCP_UDP_1_LBN 108 + #define TCP_UDP_1_WIDTH 1 + #define RXQ_ID_1_LBN 96 + #define RXQ_ID_1_WIDTH 12 + #define DEST_IP_1_LBN 64 + #define DEST_IP_1_WIDTH 32 + #define DEST_PORT_TCP_1_LBN 48 + #define DEST_PORT_TCP_1_WIDTH 16 + #define SRC_IP_1_LBN 16 + #define SRC_IP_1_WIDTH 32 + #define SRC_TCP_DEST_UDP_1_LBN 0 + #define SRC_TCP_DEST_UDP_1_WIDTH 16 + +#if FALCON_EXTENDED_P_BAR +#define RX_DESC_PTR_TBL_KER_OFST 0x11800 /* Receive descriptor pointer + kernel access */ +#else +#define RX_DESC_PTR_TBL_KER_OFST 0x1800 /* Receive descriptor pointer + kernel access */ +#endif + + +#define RX_DESC_PTR_TBL_OFST 0xF40000 /* Receive descriptor pointer table */ + #define RX_ISCSI_DDIG_EN_LBN 88 + #define RX_ISCSI_DDIG_EN_WIDTH 1 + #define RX_ISCSI_HDIG_EN_LBN 87 + #define RX_ISCSI_HDIG_EN_WIDTH 1 + #define RX_DESC_PREF_ACT_LBN 86 + #define RX_DESC_PREF_ACT_WIDTH 1 + #define RX_DC_HW_RPTR_LBN 80 + #define RX_DC_HW_RPTR_WIDTH 6 + #define RX_DESCQ_HW_RPTR_LBN 68 + #define RX_DESCQ_HW_RPTR_WIDTH 12 + #define RX_DESCQ_SW_WPTR_LBN 56 + #define RX_DESCQ_SW_WPTR_WIDTH 12 + #define RX_DESCQ_BUF_BASE_ID_LBN 36 + #define RX_DESCQ_BUF_BASE_ID_WIDTH 20 + #define RX_DESCQ_EVQ_ID_LBN 24 + #define RX_DESCQ_EVQ_ID_WIDTH 12 + #define RX_DESCQ_OWNER_ID_LBN 10 + #define RX_DESCQ_OWNER_ID_WIDTH 14 + #define RX_DESCQ_LABEL_LBN 5 + #define RX_DESCQ_LABEL_WIDTH 5 + #define RX_DESCQ_SIZE_LBN 3 + #define RX_DESCQ_SIZE_WIDTH 2 + #define RX_DESCQ_TYPE_LBN 2 + #define RX_DESCQ_TYPE_WIDTH 1 + #define RX_DESCQ_JUMBO_LBN 1 + #define RX_DESCQ_JUMBO_WIDTH 1 + #define RX_DESCQ_EN_LBN 0 + #define RX_DESCQ_EN_WIDTH 1 + + +#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 /* RSS indirection table (B0 only) */ + #define RX_RSS_INDIR_ENT_B0_LBN 0 + #define RX_RSS_INDIR_ENT_B0_WIDTH 6 + +/*************---- TX Datapath Registers C Header ----*************/ +#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 /* Transmit flush descriptor + queue register */ +#define TX_FLUSH_DESCQ_REG_OFST 0xA00 /* Transmit flush descriptor queue + register */ + #define TX_FLUSH_DESCQ_CMD_LBN 12 + #define TX_FLUSH_DESCQ_CMD_WIDTH 1 + #define TX_FLUSH_DESCQ_LBN 0 + #define TX_FLUSH_DESCQ_WIDTH 12 +#define TX_DESC_UPD_REG_KER_OFST 0xA10 /* Kernel transmit descriptor update + register. Page-mapped */ +#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 /* Char & user transmit descriptor + update register. Page-mapped */ +#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 /* Char & user transmit + descriptor update register. + Page-mapped */ + #define TX_DESC_WPTR_LBN 96 + #define TX_DESC_WPTR_WIDTH 12 + #define TX_DESC_PUSH_CMD_LBN 95 + #define TX_DESC_PUSH_CMD_WIDTH 1 + #define TX_DESC_LBN 0 + #define TX_DESC_WIDTH 95 + #define TX_KER_DESC_LBN 0 + #define TX_KER_DESC_WIDTH 64 + #define TX_USR_DESC_LBN 0 + #define TX_USR_DESC_WIDTH 64 +#define TX_DC_CFG_REG_KER_OFST 0xA20 /* Transmit descriptor cache + configuration register */ +#define TX_DC_CFG_REG_OFST 0xA20 /* Transmit descriptor cache configuration + register */ + #define TX_DC_SIZE_LBN 0 + #define TX_DC_SIZE_WIDTH 2 + +#if FALCON_EXTENDED_P_BAR +#define TX_DESC_PTR_TBL_KER_OFST 0x11900 /* Transmit descriptor pointer. */ +#else +#define TX_DESC_PTR_TBL_KER_OFST 0x1900 /* Transmit descriptor pointer. */ +#endif + + +#define TX_DESC_PTR_TBL_OFST 0xF50000 /* Transmit descriptor pointer */ + #define TX_NON_IP_DROP_DIS_B0_LBN 91 + #define TX_NON_IP_DROP_DIS_B0_WIDTH 1 + #define TX_IP_CHKSM_DIS_B0_LBN 90 + #define TX_IP_CHKSM_DIS_B0_WIDTH 1 + #define TX_TCP_CHKSM_DIS_B0_LBN 89 + #define TX_TCP_CHKSM_DIS_B0_WIDTH 1 + #define TX_DESCQ_EN_LBN 88 + #define TX_DESCQ_EN_WIDTH 1 + #define TX_ISCSI_DDIG_EN_LBN 87 + #define TX_ISCSI_DDIG_EN_WIDTH 1 + #define TX_ISCSI_HDIG_EN_LBN 86 + #define TX_ISCSI_HDIG_EN_WIDTH 1 + #define TX_DC_HW_RPTR_LBN 80 + #define TX_DC_HW_RPTR_WIDTH 6 + #define TX_DESCQ_HW_RPTR_LBN 68 + #define TX_DESCQ_HW_RPTR_WIDTH 12 + #define TX_DESCQ_SW_WPTR_LBN 56 + #define TX_DESCQ_SW_WPTR_WIDTH 12 + #define TX_DESCQ_BUF_BASE_ID_LBN 36 + #define TX_DESCQ_BUF_BASE_ID_WIDTH 20 + #define TX_DESCQ_EVQ_ID_LBN 24 + #define TX_DESCQ_EVQ_ID_WIDTH 12 + #define TX_DESCQ_OWNER_ID_LBN 10 + #define TX_DESCQ_OWNER_ID_WIDTH 14 + #define TX_DESCQ_LABEL_LBN 5 + #define TX_DESCQ_LABEL_WIDTH 5 + #define TX_DESCQ_SIZE_LBN 3 + #define TX_DESCQ_SIZE_WIDTH 2 + #define TX_DESCQ_TYPE_LBN 1 + #define TX_DESCQ_TYPE_WIDTH 2 + #define TX_DESCQ_FLUSH_LBN 0 + #define TX_DESCQ_FLUSH_WIDTH 1 +#define TX_CFG_REG_KER_OFST 0xA50 /* Transmit configuration register */ +#define TX_CFG_REG_OFST 0xA50 /* Transmit configuration register */ + #define TX_IP_ID_P1_OFS_LBN 32 + #define TX_IP_ID_P1_OFS_WIDTH 15 + #define TX_IP_ID_P0_OFS_LBN 16 + #define TX_IP_ID_P0_OFS_WIDTH 15 + #define TX_TURBO_EN_LBN 3 + #define TX_TURBO_EN_WIDTH 1 + #define TX_OWNERR_CTL_LBN 2 + #define TX_OWNERR_CTL_WIDTH 2 + #define TX_NON_IP_DROP_DIS_LBN 1 + #define TX_NON_IP_DROP_DIS_WIDTH 1 + #define TX_IP_ID_REP_EN_LBN 0 + #define TX_IP_ID_REP_EN_WIDTH 1 +#define TX_RESERVED_REG_KER_OFST 0xA80 /* Transmit configuration register */ +#define TX_RESERVED_REG_OFST 0xA80 /* Transmit configuration register */ + #define TX_CSR_PUSH_EN_LBN 89 + #define TX_CSR_PUSH_EN_WIDTH 1 + #define TX_RX_SPACER_LBN 64 + #define TX_RX_SPACER_WIDTH 8 + #define TX_SW_EV_EN_LBN 59 + #define TX_SW_EV_EN_WIDTH 1 + #define TX_RX_SPACER_EN_LBN 57 + #define TX_RX_SPACER_EN_WIDTH 1 + #define TX_CSR_PREF_WD_TMR_LBN 24 + #define TX_CSR_PREF_WD_TMR_WIDTH 16 + #define TX_CSR_ONLY1TAG_LBN 21 + #define TX_CSR_ONLY1TAG_WIDTH 1 + #define TX_PREF_THRESHOLD_LBN 19 + #define TX_PREF_THRESHOLD_WIDTH 2 + #define TX_ONE_PKT_PER_Q_LBN 18 + #define TX_ONE_PKT_PER_Q_WIDTH 1 + #define TX_DIS_NON_IP_EV_LBN 17 + #define TX_DIS_NON_IP_EV_WIDTH 1 + #define TX_DMA_SPACER_LBN 8 + #define TX_DMA_SPACER_WIDTH 8 + #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7 + #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1 + #define TX_TCP_DIS_A1_LBN 7 + #define TX_TCP_DIS_A1_WIDTH 1 + #define TX_IP_DIS_A1_LBN 6 + #define TX_IP_DIS_A1_WIDTH 1 + #define TX_MAX_CPL_LBN 2 + #define TX_MAX_CPL_WIDTH 2 + #define TX_MAX_PREF_LBN 0 + #define TX_MAX_PREF_WIDTH 2 +#define TX_VLAN_REG_OFST 0xAE0 /* Transmit VLAN tag register */ + #define TX_VLAN_EN_LBN 127 + #define TX_VLAN_EN_WIDTH 1 + #define TX_VLAN7_PORT1_EN_LBN 125 + #define TX_VLAN7_PORT1_EN_WIDTH 1 + #define TX_VLAN7_PORT0_EN_LBN 124 + #define TX_VLAN7_PORT0_EN_WIDTH 1 + #define TX_VLAN7_LBN 112 + #define TX_VLAN7_WIDTH 12 + #define TX_VLAN6_PORT1_EN_LBN 109 + #define TX_VLAN6_PORT1_EN_WIDTH 1 + #define TX_VLAN6_PORT0_EN_LBN 108 + #define TX_VLAN6_PORT0_EN_WIDTH 1 + #define TX_VLAN6_LBN 96 + #define TX_VLAN6_WIDTH 12 + #define TX_VLAN5_PORT1_EN_LBN 93 + #define TX_VLAN5_PORT1_EN_WIDTH 1 + #define TX_VLAN5_PORT0_EN_LBN 92 + #define TX_VLAN5_PORT0_EN_WIDTH 1 + #define TX_VLAN5_LBN 80 + #define TX_VLAN5_WIDTH 12 + #define TX_VLAN4_PORT1_EN_LBN 77 + #define TX_VLAN4_PORT1_EN_WIDTH 1 + #define TX_VLAN4_PORT0_EN_LBN 76 + #define TX_VLAN4_PORT0_EN_WIDTH 1 + #define TX_VLAN4_LBN 64 + #define TX_VLAN4_WIDTH 12 + #define TX_VLAN3_PORT1_EN_LBN 61 + #define TX_VLAN3_PORT1_EN_WIDTH 1 + #define TX_VLAN3_PORT0_EN_LBN 60 + #define TX_VLAN3_PORT0_EN_WIDTH 1 + #define TX_VLAN3_LBN 48 + #define TX_VLAN3_WIDTH 12 + #define TX_VLAN2_PORT1_EN_LBN 45 + #define TX_VLAN2_PORT1_EN_WIDTH 1 + #define TX_VLAN2_PORT0_EN_LBN 44 + #define TX_VLAN2_PORT0_EN_WIDTH 1 + #define TX_VLAN2_LBN 32 + #define TX_VLAN2_WIDTH 12 + #define TX_VLAN1_PORT1_EN_LBN 29 + #define TX_VLAN1_PORT1_EN_WIDTH 1 + #define TX_VLAN1_PORT0_EN_LBN 28 + #define TX_VLAN1_PORT0_EN_WIDTH 1 + #define TX_VLAN1_LBN 16 + #define TX_VLAN1_WIDTH 12 + #define TX_VLAN0_PORT1_EN_LBN 13 + #define TX_VLAN0_PORT1_EN_WIDTH 1 + #define TX_VLAN0_PORT0_EN_LBN 12 + #define TX_VLAN0_PORT0_EN_WIDTH 1 + #define TX_VLAN0_LBN 0 + #define TX_VLAN0_WIDTH 12 +#define TX_FIL_CTL_REG_OFST 0xAF0 /* Transmit filter control register */ + #define TX_MADR1_FIL_EN_LBN 65 + #define TX_MADR1_FIL_EN_WIDTH 1 + #define TX_MADR0_FIL_EN_LBN 64 + #define TX_MADR0_FIL_EN_WIDTH 1 + #define TX_IPFIL31_PORT1_EN_LBN 63 + #define TX_IPFIL31_PORT1_EN_WIDTH 1 + #define TX_IPFIL31_PORT0_EN_LBN 62 + #define TX_IPFIL31_PORT0_EN_WIDTH 1 + #define TX_IPFIL30_PORT1_EN_LBN 61 + #define TX_IPFIL30_PORT1_EN_WIDTH 1 + #define TX_IPFIL30_PORT0_EN_LBN 60 + #define TX_IPFIL30_PORT0_EN_WIDTH 1 + #define TX_IPFIL29_PORT1_EN_LBN 59 + #define TX_IPFIL29_PORT1_EN_WIDTH 1 + #define TX_IPFIL29_PORT0_EN_LBN 58 + #define TX_IPFIL29_PORT0_EN_WIDTH 1 + #define TX_IPFIL28_PORT1_EN_LBN 57 + #define TX_IPFIL28_PORT1_EN_WIDTH 1 + #define TX_IPFIL28_PORT0_EN_LBN 56 + #define TX_IPFIL28_PORT0_EN_WIDTH 1 + #define TX_IPFIL27_PORT1_EN_LBN 55 + #define TX_IPFIL27_PORT1_EN_WIDTH 1 + #define TX_IPFIL27_PORT0_EN_LBN 54 + #define TX_IPFIL27_PORT0_EN_WIDTH 1 + #define TX_IPFIL26_PORT1_EN_LBN 53 + #define TX_IPFIL26_PORT1_EN_WIDTH 1 + #define TX_IPFIL26_PORT0_EN_LBN 52 + #define TX_IPFIL26_PORT0_EN_WIDTH 1 + #define TX_IPFIL25_PORT1_EN_LBN 51 + #define TX_IPFIL25_PORT1_EN_WIDTH 1 + #define TX_IPFIL25_PORT0_EN_LBN 50 + #define TX_IPFIL25_PORT0_EN_WIDTH 1 + #define TX_IPFIL24_PORT1_EN_LBN 49 + #define TX_IPFIL24_PORT1_EN_WIDTH 1 + #define TX_IPFIL24_PORT0_EN_LBN 48 + #define TX_IPFIL24_PORT0_EN_WIDTH 1 + #define TX_IPFIL23_PORT1_EN_LBN 47 + #define TX_IPFIL23_PORT1_EN_WIDTH 1 + #define TX_IPFIL23_PORT0_EN_LBN 46 + #define TX_IPFIL23_PORT0_EN_WIDTH 1 + #define TX_IPFIL22_PORT1_EN_LBN 45 + #define TX_IPFIL22_PORT1_EN_WIDTH 1 + #define TX_IPFIL22_PORT0_EN_LBN 44 + #define TX_IPFIL22_PORT0_EN_WIDTH 1 + #define TX_IPFIL21_PORT1_EN_LBN 43 + #define TX_IPFIL21_PORT1_EN_WIDTH 1 + #define TX_IPFIL21_PORT0_EN_LBN 42 + #define TX_IPFIL21_PORT0_EN_WIDTH 1 + #define TX_IPFIL20_PORT1_EN_LBN 41 + #define TX_IPFIL20_PORT1_EN_WIDTH 1 + #define TX_IPFIL20_PORT0_EN_LBN 40 + #define TX_IPFIL20_PORT0_EN_WIDTH 1 + #define TX_IPFIL19_PORT1_EN_LBN 39 + #define TX_IPFIL19_PORT1_EN_WIDTH 1 + #define TX_IPFIL19_PORT0_EN_LBN 38 + #define TX_IPFIL19_PORT0_EN_WIDTH 1 + #define TX_IPFIL18_PORT1_EN_LBN 37 + #define TX_IPFIL18_PORT1_EN_WIDTH 1 + #define TX_IPFIL18_PORT0_EN_LBN 36 + #define TX_IPFIL18_PORT0_EN_WIDTH 1 + #define TX_IPFIL17_PORT1_EN_LBN 35 + #define TX_IPFIL17_PORT1_EN_WIDTH 1 + #define TX_IPFIL17_PORT0_EN_LBN 34 + #define TX_IPFIL17_PORT0_EN_WIDTH 1 + #define TX_IPFIL16_PORT1_EN_LBN 33 + #define TX_IPFIL16_PORT1_EN_WIDTH 1 + #define TX_IPFIL16_PORT0_EN_LBN 32 + #define TX_IPFIL16_PORT0_EN_WIDTH 1 + #define TX_IPFIL15_PORT1_EN_LBN 31 + #define TX_IPFIL15_PORT1_EN_WIDTH 1 + #define TX_IPFIL15_PORT0_EN_LBN 30 + #define TX_IPFIL15_PORT0_EN_WIDTH 1 + #define TX_IPFIL14_PORT1_EN_LBN 29 + #define TX_IPFIL14_PORT1_EN_WIDTH 1 + #define TX_IPFIL14_PORT0_EN_LBN 28 + #define TX_IPFIL14_PORT0_EN_WIDTH 1 + #define TX_IPFIL13_PORT1_EN_LBN 27 + #define TX_IPFIL13_PORT1_EN_WIDTH 1 + #define TX_IPFIL13_PORT0_EN_LBN 26 + #define TX_IPFIL13_PORT0_EN_WIDTH 1 + #define TX_IPFIL12_PORT1_EN_LBN 25 + #define TX_IPFIL12_PORT1_EN_WIDTH 1 + #define TX_IPFIL12_PORT0_EN_LBN 24 + #define TX_IPFIL12_PORT0_EN_WIDTH 1 + #define TX_IPFIL11_PORT1_EN_LBN 23 + #define TX_IPFIL11_PORT1_EN_WIDTH 1 + #define TX_IPFIL11_PORT0_EN_LBN 22 + #define TX_IPFIL11_PORT0_EN_WIDTH 1 + #define TX_IPFIL10_PORT1_EN_LBN 21 + #define TX_IPFIL10_PORT1_EN_WIDTH 1 + #define TX_IPFIL10_PORT0_EN_LBN 20 + #define TX_IPFIL10_PORT0_EN_WIDTH 1 + #define TX_IPFIL9_PORT1_EN_LBN 19 + #define TX_IPFIL9_PORT1_EN_WIDTH 1 + #define TX_IPFIL9_PORT0_EN_LBN 18 + #define TX_IPFIL9_PORT0_EN_WIDTH 1 + #define TX_IPFIL8_PORT1_EN_LBN 17 + #define TX_IPFIL8_PORT1_EN_WIDTH 1 + #define TX_IPFIL8_PORT0_EN_LBN 16 + #define TX_IPFIL8_PORT0_EN_WIDTH 1 + #define TX_IPFIL7_PORT1_EN_LBN 15 + #define TX_IPFIL7_PORT1_EN_WIDTH 1 + #define TX_IPFIL7_PORT0_EN_LBN 14 + #define TX_IPFIL7_PORT0_EN_WIDTH 1 + #define TX_IPFIL6_PORT1_EN_LBN 13 + #define TX_IPFIL6_PORT1_EN_WIDTH 1 + #define TX_IPFIL6_PORT0_EN_LBN 12 + #define TX_IPFIL6_PORT0_EN_WIDTH 1 + #define TX_IPFIL5_PORT1_EN_LBN 11 + #define TX_IPFIL5_PORT1_EN_WIDTH 1 + #define TX_IPFIL5_PORT0_EN_LBN 10 + #define TX_IPFIL5_PORT0_EN_WIDTH 1 + #define TX_IPFIL4_PORT1_EN_LBN 9 + #define TX_IPFIL4_PORT1_EN_WIDTH 1 + #define TX_IPFIL4_PORT0_EN_LBN 8 + #define TX_IPFIL4_PORT0_EN_WIDTH 1 + #define TX_IPFIL3_PORT1_EN_LBN 7 + #define TX_IPFIL3_PORT1_EN_WIDTH 1 + #define TX_IPFIL3_PORT0_EN_LBN 6 + #define TX_IPFIL3_PORT0_EN_WIDTH 1 + #define TX_IPFIL2_PORT1_EN_LBN 5 + #define TX_IPFIL2_PORT1_EN_WIDTH 1 + #define TX_IPFIL2_PORT0_EN_LBN 4 + #define TX_IPFIL2_PORT0_EN_WIDTH 1 + #define TX_IPFIL1_PORT1_EN_LBN 3 + #define TX_IPFIL1_PORT1_EN_WIDTH 1 + #define TX_IPFIL1_PORT0_EN_LBN 2 + #define TX_IPFIL1_PORT0_EN_WIDTH 1 + #define TX_IPFIL0_PORT1_EN_LBN 1 + #define TX_IPFIL0_PORT1_EN_WIDTH 1 + #define TX_IPFIL0_PORT0_EN_LBN 0 + #define TX_IPFIL0_PORT0_EN_WIDTH 1 +#define TX_IPFIL_TBL_OFST 0xB00 /* Transmit IP source address filter table */ + #define TX_IPFIL_MASK_LBN 32 + #define TX_IPFIL_MASK_WIDTH 32 + #define TX_IP_SRC_ADR_LBN 0 + #define TX_IP_SRC_ADR_WIDTH 32 +#define TX_PACE_REG_A1_OFST 0xF80000 /* Transmit pace control register */ +#define TX_PACE_REG_B0_OFST 0xA90 /* Transmit pace control register */ + #define TX_PACE_SB_NOTAF_LBN 19 + #define TX_PACE_SB_NOTAF_WIDTH 10 + #define TX_PACE_SB_AF_LBN 9 + #define TX_PACE_SB_AF_WIDTH 10 + #define TX_PACE_FB_BASE_LBN 5 + #define TX_PACE_FB_BASE_WIDTH 4 + #define TX_PACE_BIN_TH_LBN 0 + #define TX_PACE_BIN_TH_WIDTH 5 +#define TX_PACE_TBL_A1_OFST 0xF80040 /* Transmit pacing table */ +#define TX_PACE_TBL_FIRST_QUEUE_A1 4 +#define TX_PACE_TBL_B0_OFST 0xF80000 /* Transmit pacing table */ +#define TX_PACE_TBL_FIRST_QUEUE_B0 0 + #define TX_PACE_LBN 0 + #define TX_PACE_WIDTH 5 + +/*************---- EE/Flash Registers C Header ----*************/ +#define EE_SPI_HCMD_REG_KER_OFST 0x100 /* SPI host command register */ +#define EE_SPI_HCMD_REG_OFST 0x100 /* SPI host command register */ + #define EE_SPI_HCMD_CMD_EN_LBN 31 + #define EE_SPI_HCMD_CMD_EN_WIDTH 1 + #define EE_WR_TIMER_ACTIVE_LBN 28 + #define EE_WR_TIMER_ACTIVE_WIDTH 1 + #define EE_SPI_HCMD_SF_SEL_LBN 24 + #define EE_SPI_HCMD_SF_SEL_WIDTH 1 + #define EE_SPI_HCMD_DABCNT_LBN 16 + #define EE_SPI_HCMD_DABCNT_WIDTH 5 + #define EE_SPI_HCMD_READ_LBN 15 + #define EE_SPI_HCMD_READ_WIDTH 1 + #define EE_SPI_HCMD_DUBCNT_LBN 12 + #define EE_SPI_HCMD_DUBCNT_WIDTH 2 + #define EE_SPI_HCMD_ADBCNT_LBN 8 + #define EE_SPI_HCMD_ADBCNT_WIDTH 2 + #define EE_SPI_HCMD_ENC_LBN 0 + #define EE_SPI_HCMD_ENC_WIDTH 8 +#define EE_SPI_HADR_REG_KER_OFST 0X110 /* SPI host address register */ +#define EE_SPI_HADR_REG_OFST 0X110 /* SPI host address register */ + #define EE_SPI_HADR_DUBYTE_LBN 24 + #define EE_SPI_HADR_DUBYTE_WIDTH 8 + #define EE_SPI_HADR_ADR_LBN 0 + #define EE_SPI_HADR_ADR_WIDTH 24 +#define EE_SPI_HDATA_REG_KER_OFST 0x120 /* SPI host data register */ +#define EE_SPI_HDATA_REG_OFST 0x120 /* SPI host data register */ + #define EE_SPI_HDATA3_LBN 96 + #define EE_SPI_HDATA3_WIDTH 32 + #define EE_SPI_HDATA2_LBN 64 + #define EE_SPI_HDATA2_WIDTH 32 + #define EE_SPI_HDATA1_LBN 32 + #define EE_SPI_HDATA1_WIDTH 32 + #define EE_SPI_HDATA0_LBN 0 + #define EE_SPI_HDATA0_WIDTH 32 +#define EE_BASE_PAGE_REG_KER_OFST 0x130 /* Expansion ROM base mirror register */ +#define EE_BASE_PAGE_REG_OFST 0x130 /* Expansion ROM base mirror register */ + #define EE_EXP_ROM_WINDOW_BASE_LBN 16 + #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13 + #define EE_EXPROM_MASK_LBN 0 + #define EE_EXPROM_MASK_WIDTH 13 +#define EE_VPD_CFG0_REG_KER_OFST 0X140 /* SPI/VPD configuration register */ +#define EE_VPD_CFG0_REG_OFST 0X140 /* SPI/VPD configuration register */ + #define EE_SF_FASTRD_EN_LBN 127 + #define EE_SF_FASTRD_EN_WIDTH 1 + #define EE_SF_CLOCK_DIV_LBN 120 + #define EE_SF_CLOCK_DIV_WIDTH 7 + #define EE_VPD_WIP_POLL_LBN 119 + #define EE_VPD_WIP_POLL_WIDTH 1 + #define EE_VPDW_LENGTH_LBN 80 + #define EE_VPDW_LENGTH_WIDTH 15 + #define EE_VPDW_BASE_LBN 64 + #define EE_VPDW_BASE_WIDTH 15 + #define EE_VPD_WR_CMD_EN_LBN 56 + #define EE_VPD_WR_CMD_EN_WIDTH 8 + #define EE_VPD_BASE_LBN 32 + #define EE_VPD_BASE_WIDTH 24 + #define EE_VPD_LENGTH_LBN 16 + #define EE_VPD_LENGTH_WIDTH 13 + #define EE_VPD_AD_SIZE_LBN 8 + #define EE_VPD_AD_SIZE_WIDTH 5 + #define EE_VPD_ACCESS_ON_LBN 5 + #define EE_VPD_ACCESS_ON_WIDTH 1 +#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 /* VPD access SW control register */ +#define EE_VPD_SW_CNTL_REG_OFST 0X150 /* VPD access SW control register */ + #define EE_VPD_CYCLE_PENDING_LBN 31 + #define EE_VPD_CYCLE_PENDING_WIDTH 1 + #define EE_VPD_CYC_WRITE_LBN 28 + #define EE_VPD_CYC_WRITE_WIDTH 1 + #define EE_VPD_CYC_ADR_LBN 0 + #define EE_VPD_CYC_ADR_WIDTH 15 +#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 /* VPD access SW data register */ +#define EE_VPD_SW_DATA_REG_OFST 0x160 /* VPD access SW data register */ + #define EE_VPD_CYC_DAT_LBN 0 + #define EE_VPD_CYC_DAT_WIDTH 32 --- linux-ec2-2.6.31.orig/drivers/net/e1000/e1000.h +++ linux-ec2-2.6.31/drivers/net/e1000/e1000.h @@ -324,6 +324,8 @@ /* for ioport free */ int bars; int need_ioport; + + bool discarding; }; enum e1000_state_t { --- linux-ec2-2.6.31.orig/drivers/net/e1000/e1000_main.c +++ linux-ec2-2.6.31/drivers/net/e1000/e1000_main.c @@ -1924,18 +1924,6 @@ rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; switch (adapter->rx_buffer_len) { - case E1000_RXBUFFER_256: - rctl |= E1000_RCTL_SZ_256; - rctl &= ~E1000_RCTL_BSEX; - break; - case E1000_RXBUFFER_512: - rctl |= E1000_RCTL_SZ_512; - rctl &= ~E1000_RCTL_BSEX; - break; - case E1000_RXBUFFER_1024: - rctl |= E1000_RCTL_SZ_1024; - rctl &= ~E1000_RCTL_BSEX; - break; case E1000_RXBUFFER_2048: default: rctl |= E1000_RCTL_SZ_2048; @@ -3492,13 +3480,7 @@ * larger slab size * i.e. RXBUFFER_2048 --> size-4096 slab */ - if (max_frame <= E1000_RXBUFFER_256) - adapter->rx_buffer_len = E1000_RXBUFFER_256; - else if (max_frame <= E1000_RXBUFFER_512) - adapter->rx_buffer_len = E1000_RXBUFFER_512; - else if (max_frame <= E1000_RXBUFFER_1024) - adapter->rx_buffer_len = E1000_RXBUFFER_1024; - else if (max_frame <= E1000_RXBUFFER_2048) + if (max_frame <= E1000_RXBUFFER_2048) adapter->rx_buffer_len = E1000_RXBUFFER_2048; else if (max_frame <= E1000_RXBUFFER_4096) adapter->rx_buffer_len = E1000_RXBUFFER_4096; @@ -4041,13 +4023,22 @@ length = le16_to_cpu(rx_desc->length); /* !EOP means multiple descriptors were used to store a single - * packet, also make sure the frame isn't just CRC only */ - if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) { + * packet, if thats the case we need to toss it. In fact, we + * to toss every packet with the EOP bit clear and the next + * frame that _does_ have the EOP bit set, as it is by + * definition only a frame fragment + */ + if (unlikely(!(status & E1000_RXD_STAT_EOP))) + adapter->discarding = true; + + if (adapter->discarding) { /* All receives must fit into a single buffer */ E1000_DBG("%s: Receive packet consumed multiple" " buffers\n", netdev->name); /* recycle */ buffer_info->skb = skb; + if (status & E1000_RXD_STAT_EOP) + adapter->discarding = false; goto next_desc; } --- linux-ec2-2.6.31.orig/drivers/platform/x86/acerhdf.c +++ linux-ec2-2.6.31/drivers/platform/x86/acerhdf.c @@ -52,7 +52,7 @@ */ #undef START_IN_KERNEL_MODE -#define DRV_VER "0.5.13" +#define DRV_VER "0.5.16" /* * According to the Atom N270 datasheet, @@ -61,7 +61,7 @@ * measured by the on-die thermal monitor are within 0 <= Tj <= 90. So, * assume 89°C is critical temperature. */ -#define ACERHDF_TEMP_CRIT 89 +#define ACERHDF_TEMP_CRIT 89000 #define ACERHDF_FAN_OFF 0 #define ACERHDF_FAN_AUTO 1 @@ -69,7 +69,7 @@ * No matter what value the user puts into the fanon variable, turn on the fan * at 80 degree Celsius to prevent hardware damage */ -#define ACERHDF_MAX_FANON 80 +#define ACERHDF_MAX_FANON 80000 /* * Maximum interval between two temperature checks is 15 seconds, as the die @@ -85,11 +85,12 @@ #endif static unsigned int interval = 10; -static unsigned int fanon = 63; -static unsigned int fanoff = 58; +static unsigned int fanon = 63000; +static unsigned int fanoff = 58000; static unsigned int verbose; static unsigned int fanstate = ACERHDF_FAN_AUTO; static char force_bios[16]; +static char force_product[16]; static unsigned int prev_interval; struct thermal_zone_device *thz_dev; struct thermal_cooling_device *cl_dev; @@ -107,34 +108,62 @@ MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); module_param_string(force_bios, force_bios, 16, 0); MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check"); +module_param_string(force_product, force_product, 16, 0); +MODULE_PARM_DESC(force_product, "Force BIOS product and omit BIOS check"); + +/* + * cmd_off: to switch the fan completely off / to check if the fan is off + * cmd_auto: to set the BIOS in control of the fan. The BIOS regulates then + * the fan speed depending on the temperature + */ +struct fancmd { + u8 cmd_off; + u8 cmd_auto; +}; /* BIOS settings */ struct bios_settings_t { const char *vendor; + const char *product; const char *version; unsigned char fanreg; unsigned char tempreg; - unsigned char fancmd[2]; /* fan off and auto commands */ + struct fancmd cmd; }; /* Register addresses and values for different BIOS versions */ static const struct bios_settings_t bios_tbl[] = { - {"Acer", "v0.3109", 0x55, 0x58, {0x1f, 0x00} }, - {"Acer", "v0.3114", 0x55, 0x58, {0x1f, 0x00} }, - {"Acer", "v0.3301", 0x55, 0x58, {0xaf, 0x00} }, - {"Acer", "v0.3304", 0x55, 0x58, {0xaf, 0x00} }, - {"Acer", "v0.3305", 0x55, 0x58, {0xaf, 0x00} }, - {"Acer", "v0.3308", 0x55, 0x58, {0x21, 0x00} }, - {"Acer", "v0.3309", 0x55, 0x58, {0x21, 0x00} }, - {"Acer", "v0.3310", 0x55, 0x58, {0x21, 0x00} }, - {"Gateway", "v0.3103", 0x55, 0x58, {0x21, 0x00} }, - {"Packard Bell", "v0.3105", 0x55, 0x58, {0x21, 0x00} }, - {"", "", 0, 0, {0, 0} } + /* AOA110 */ + {"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00} }, + {"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00} }, + {"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00} }, + {"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00} }, + {"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00} }, + {"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00} }, + {"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00} }, + {"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00} }, + {"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00} }, + /* AOA150 */ + {"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00} }, + {"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00} }, + /* special BIOS / other */ + {"Gateway", "AOA110", "v0.3103", 0x55, 0x58, {0x21, 0x00} }, + {"Gateway", "AOA150", "v0.3103", 0x55, 0x58, {0x20, 0x00} }, + {"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00} }, + {"Packard Bell", "AOA110", "v0.3105", 0x55, 0x58, {0x21, 0x00} }, + {"Packard Bell", "AOA150", "v0.3105", 0x55, 0x58, {0x20, 0x00} }, + /* pewpew-terminator */ + {"", "", "", 0, 0, {0, 0} } }; static const struct bios_settings_t *bios_cfg __read_mostly; - static int acerhdf_get_temp(int *temp) { u8 read_temp; @@ -142,7 +171,7 @@ if (ec_read(bios_cfg->tempreg, &read_temp)) return -EINVAL; - *temp = read_temp; + *temp = read_temp * 1000; return 0; } @@ -150,13 +179,14 @@ static int acerhdf_get_fanstate(int *state) { u8 fan; - bool tmp; if (ec_read(bios_cfg->fanreg, &fan)) return -EINVAL; - tmp = (fan == bios_cfg->fancmd[ACERHDF_FAN_OFF]); - *state = tmp ? ACERHDF_FAN_OFF : ACERHDF_FAN_AUTO; + if (fan != bios_cfg->cmd.cmd_off) + *state = ACERHDF_FAN_AUTO; + else + *state = ACERHDF_FAN_OFF; return 0; } @@ -175,7 +205,8 @@ state = ACERHDF_FAN_AUTO; } - cmd = bios_cfg->fancmd[state]; + cmd = (state == ACERHDF_FAN_OFF) ? bios_cfg->cmd.cmd_off + : bios_cfg->cmd.cmd_auto; fanstate = state; ec_write(bios_cfg->fanreg, cmd); @@ -437,7 +468,7 @@ return 0; } -struct platform_driver acerhdf_drv = { +static struct platform_driver acerhdf_driver = { .driver = { .name = "acerhdf", .owner = THIS_MODULE, @@ -454,32 +485,40 @@ { char const *vendor, *version, *product; int i; + unsigned long prod_len = 0; /* get BIOS data */ vendor = dmi_get_system_info(DMI_SYS_VENDOR); version = dmi_get_system_info(DMI_BIOS_VERSION); product = dmi_get_system_info(DMI_PRODUCT_NAME); + pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER); - if (!force_bios[0]) { - if (strncmp(product, "AO", 2)) { - pr_err("no Aspire One hardware found\n"); - return -EINVAL; - } - } else { - pr_info("forcing BIOS version: %s\n", version); + if (force_bios[0]) { version = force_bios; + pr_info("forcing BIOS version: %s\n", version); kernelmode = 0; } + if (force_product[0]) { + product = force_product; + pr_info("forcing BIOS product: %s\n", product); + kernelmode = 0; + } + + prod_len = strlen(product); + if (verbose) pr_info("BIOS info: %s %s, product: %s\n", vendor, version, product); /* search BIOS version and vendor in BIOS settings table */ for (i = 0; bios_tbl[i].version[0]; i++) { - if (!strcmp(bios_tbl[i].vendor, vendor) && + if (strlen(bios_tbl[i].product) >= prod_len && + !strncmp(bios_tbl[i].product, product, + strlen(bios_tbl[i].product)) && + !strcmp(bios_tbl[i].vendor, vendor) && !strcmp(bios_tbl[i].version, version)) { bios_cfg = &bios_tbl[i]; break; @@ -487,8 +526,8 @@ } if (!bios_cfg) { - pr_err("unknown (unsupported) BIOS version %s/%s, " - "please report, aborting!\n", vendor, version); + pr_err("unknown (unsupported) BIOS version %s/%s/%s, " + "please report, aborting!\n", vendor, product, version); return -EINVAL; } @@ -509,7 +548,7 @@ { int err = 0; - err = platform_driver_register(&acerhdf_drv); + err = platform_driver_register(&acerhdf_driver); if (err) return err; @@ -525,7 +564,7 @@ return; platform_device_del(acerhdf_dev); - platform_driver_unregister(&acerhdf_drv); + platform_driver_unregister(&acerhdf_driver); } static int acerhdf_register_thermal(void) @@ -594,9 +633,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Peter Feuerer"); MODULE_DESCRIPTION("Aspire One temperature and fan driver"); -MODULE_ALIAS("dmi:*:*Acer*:*:"); -MODULE_ALIAS("dmi:*:*Gateway*:*:"); -MODULE_ALIAS("dmi:*:*Packard Bell*:*:"); +MODULE_ALIAS("dmi:*:*Acer*:pnAOA*:"); +MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:"); +MODULE_ALIAS("dmi:*:*Packard Bell*:pnAOA*:"); +MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOA*:"); module_init(acerhdf_init); module_exit(acerhdf_exit); --- linux-ec2-2.6.31.orig/drivers/platform/x86/compal-laptop.c +++ linux-ec2-2.6.31/drivers/platform/x86/compal-laptop.c @@ -26,17 +26,8 @@ /* * comapl-laptop.c - Compal laptop support. * - * This driver exports a few files in /sys/devices/platform/compal-laptop/: - * - * wlan - wlan subsystem state: contains 0 or 1 (rw) - * - * bluetooth - Bluetooth subsystem state: contains 0 or 1 (rw) - * - * raw - raw value taken from embedded controller register (ro) - * - * In addition to these platform device attributes the driver - * registers itself in the Linux backlight control subsystem and is - * available to userspace under /sys/class/backlight/compal-laptop/. + * The driver registers itself with the rfkill subsystem and + * the Linux backlight control subsystem. * * This driver might work on other laptops produced by Compal. If you * want to try it you can pass force=1 as argument to the module which @@ -52,6 +43,7 @@ #include #include #include +#include #define COMPAL_DRIVER_VERSION "0.2.6" @@ -64,6 +56,10 @@ #define WLAN_MASK 0x01 #define BT_MASK 0x02 +static struct rfkill *wifi_rfkill; +static struct rfkill *bt_rfkill; +static struct platform_device *compal_device; + static int force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); @@ -89,65 +85,75 @@ return (int) result; } -static int set_wlan_state(int state) +static int compal_rfkill_set(void *data, bool blocked) { + unsigned long radio = (unsigned long) data; u8 result, value; ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); - if ((result & KILLSWITCH_MASK) == 0) - return -EINVAL; - else { - if (state) - value = (u8) (result | WLAN_MASK); - else - value = (u8) (result & ~WLAN_MASK); - ec_write(COMPAL_EC_COMMAND_WIRELESS, value); - } + if (!blocked) + value = (u8) (result | radio); + else + value = (u8) (result & ~radio); + ec_write(COMPAL_EC_COMMAND_WIRELESS, value); return 0; } -static int set_bluetooth_state(int state) +static void compal_rfkill_poll(struct rfkill *rfkill, void *data) { - u8 result, value; + u8 result; + bool hw_blocked; ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); - if ((result & KILLSWITCH_MASK) == 0) - return -EINVAL; - else { - if (state) - value = (u8) (result | BT_MASK); - else - value = (u8) (result & ~BT_MASK); - ec_write(COMPAL_EC_COMMAND_WIRELESS, value); - } - - return 0; + hw_blocked = !(result & KILLSWITCH_MASK); + rfkill_set_hw_state(rfkill, hw_blocked); } -static int get_wireless_state(int *wlan, int *bluetooth) +static const struct rfkill_ops compal_rfkill_ops = { + .poll = compal_rfkill_poll, + .set_block = compal_rfkill_set, +}; + +static int setup_rfkill(void) { - u8 result; + int ret; - ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); + wifi_rfkill = rfkill_alloc("compal-wifi", &compal_device->dev, + RFKILL_TYPE_WLAN, &compal_rfkill_ops, + (void *) WLAN_MASK); + if (!wifi_rfkill) + return -ENOMEM; - if (wlan) { - if ((result & KILLSWITCH_MASK) == 0) - *wlan = 0; - else - *wlan = result & WLAN_MASK; - } + ret = rfkill_register(wifi_rfkill); + if (ret) + goto err_wifi; - if (bluetooth) { - if ((result & KILLSWITCH_MASK) == 0) - *bluetooth = 0; - else - *bluetooth = (result & BT_MASK) >> 1; + bt_rfkill = rfkill_alloc("compal-bluetooth", &compal_device->dev, + RFKILL_TYPE_BLUETOOTH, &compal_rfkill_ops, + (void *) BT_MASK); + if (!bt_rfkill) { + ret = -ENOMEM; + goto err_allocate_bt; } + ret = rfkill_register(bt_rfkill); + if (ret) + goto err_register_bt; return 0; + +err_register_bt: + rfkill_destroy(bt_rfkill); + +err_allocate_bt: + rfkill_unregister(wifi_rfkill); + +err_wifi: + rfkill_destroy(wifi_rfkill); + + return ret; } /* Backlight device stuff */ @@ -170,86 +176,6 @@ static struct backlight_device *compalbl_device; -/* Platform device */ - -static ssize_t show_wlan(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int ret, enabled; - - ret = get_wireless_state(&enabled, NULL); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", enabled); -} - -static ssize_t show_raw(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u8 result; - - ec_read(COMPAL_EC_COMMAND_WIRELESS, &result); - - return sprintf(buf, "%i\n", result); -} - -static ssize_t show_bluetooth(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int ret, enabled; - - ret = get_wireless_state(NULL, &enabled); - if (ret < 0) - return ret; - - return sprintf(buf, "%i\n", enabled); -} - -static ssize_t store_wlan_state(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int state, ret; - - if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) - return -EINVAL; - - ret = set_wlan_state(state); - if (ret < 0) - return ret; - - return count; -} - -static ssize_t store_bluetooth_state(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int state, ret; - - if (sscanf(buf, "%i", &state) != 1 || (state < 0 || state > 1)) - return -EINVAL; - - ret = set_bluetooth_state(state); - if (ret < 0) - return ret; - - return count; -} - -static DEVICE_ATTR(bluetooth, 0644, show_bluetooth, store_bluetooth_state); -static DEVICE_ATTR(wlan, 0644, show_wlan, store_wlan_state); -static DEVICE_ATTR(raw, 0444, show_raw, NULL); - -static struct attribute *compal_attributes[] = { - &dev_attr_bluetooth.attr, - &dev_attr_wlan.attr, - &dev_attr_raw.attr, - NULL -}; - -static struct attribute_group compal_attribute_group = { - .attrs = compal_attributes -}; static struct platform_driver compal_driver = { .driver = { @@ -258,8 +184,6 @@ } }; -static struct platform_device *compal_device; - /* Initialization */ static int dmi_check_cb(const struct dmi_system_id *id) @@ -311,6 +235,47 @@ }, .callback = dmi_check_cb }, + { + .ident = "Dell Mini 9", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 910"), + }, + .callback = dmi_check_cb + }, + { + .ident = "Dell Mini 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1010"), + }, + .callback = dmi_check_cb + }, + { + .ident = "Dell Mini 10v", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1011"), + }, + .callback = dmi_check_cb + }, + { + .ident = "Dell Inspiron 11z", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1110"), + }, + .callback = dmi_check_cb + }, + { + .ident = "Dell Mini 12", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1210"), + }, + .callback = dmi_check_cb + }, + { } }; @@ -349,23 +314,21 @@ ret = platform_device_add(compal_device); if (ret) - goto fail_platform_device1; + goto fail_platform_device; - ret = sysfs_create_group(&compal_device->dev.kobj, - &compal_attribute_group); + ret = setup_rfkill(); if (ret) - goto fail_platform_device2; + goto fail_rfkill; printk(KERN_INFO "compal-laptop: driver "COMPAL_DRIVER_VERSION " successfully loaded.\n"); return 0; -fail_platform_device2: - +fail_rfkill: platform_device_del(compal_device); -fail_platform_device1: +fail_platform_device: platform_device_put(compal_device); @@ -383,10 +346,13 @@ static void __exit compal_cleanup(void) { - sysfs_remove_group(&compal_device->dev.kobj, &compal_attribute_group); platform_device_unregister(compal_device); platform_driver_unregister(&compal_driver); backlight_device_unregister(compalbl_device); + rfkill_unregister(wifi_rfkill); + rfkill_destroy(wifi_rfkill); + rfkill_unregister(bt_rfkill); + rfkill_destroy(bt_rfkill); printk(KERN_INFO "compal-laptop: driver unloaded.\n"); } @@ -404,3 +370,8 @@ MODULE_ALIAS("dmi:*:rnIFL91:rvrIFT00:*"); MODULE_ALIAS("dmi:*:rnJFL92:rvrIFT00:*"); MODULE_ALIAS("dmi:*:rnIFT00:rvrIFT00:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron910:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1010:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1011:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1110:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1210:*"); --- linux-ec2-2.6.31.orig/drivers/platform/x86/toshiba_acpi.c +++ linux-ec2-2.6.31/drivers/platform/x86/toshiba_acpi.c @@ -28,13 +28,28 @@ * engineering the Windows drivers * Yasushi Nagato - changes for linux kernel 2.4 -> 2.5 * Rob Miller - TV out and hotkeys help + * Daniel Silverstone - Punting of hotkeys via acpi using a thread * + * PLEASE NOTE + * + * This is an experimental version of toshiba_acpi which includes emulation + * of the original toshiba driver's /proc/toshiba and /dev/toshiba, + * allowing Toshiba userspace utilities to work. The relevant code was + * based on toshiba.c (copyright 1996-2001 Jonathan A. Buzzard) and + * incorporated into this driver with help from Gintautas Miliauskas, + * Charles Schwieters, and Christoph Burger-Scheidlin. + * + * Caveats: + * * hotkey status in /proc/toshiba is not implemented + * * to make accesses to /dev/toshiba load this driver instead of + * the original driver, you will have to modify your module + * auto-loading configuration * * TODO * */ -#define TOSHIBA_ACPI_VERSION "0.19" +#define TOSHIBA_ACPI_VERSION "0.19-dev-acpikeys" #define PROC_INTERFACE_VERSION 1 #include @@ -42,9 +57,15 @@ #include #include #include +#include +#include +#include +#include #include #include #include +#include +#include #include @@ -356,6 +377,11 @@ static int force_fan; static int last_key_event; static int key_event_valid; +static int hotkeys_over_acpi = 1; +static int hotkeys_check_per_sec = 2; + +module_param(hotkeys_over_acpi, uint, 0400); +module_param(hotkeys_check_per_sec, uint, 0400); typedef struct _ProcItem { const char *name; @@ -583,27 +609,34 @@ u32 hci_result; u32 value; - if (!key_event_valid) { - hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result); - if (hci_result == HCI_SUCCESS) { - key_event_valid = 1; - last_key_event = value; - } else if (hci_result == HCI_EMPTY) { - /* better luck next time */ - } else if (hci_result == HCI_NOT_SUPPORTED) { - /* This is a workaround for an unresolved issue on - * some machines where system events sporadically - * become disabled. */ - hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); - printk(MY_NOTICE "Re-enabled hotkeys\n"); - } else { - printk(MY_ERR "Error reading hotkey status\n"); - goto end; + if (!hotkeys_over_acpi) { + if (!key_event_valid) { + hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result); + if (hci_result == HCI_SUCCESS) { + key_event_valid = 1; + last_key_event = value; + } else if (hci_result == HCI_EMPTY) { + /* better luck next time */ + } else if (hci_result == HCI_NOT_SUPPORTED) { + /* This is a workaround for an + * unresolved issue on some machines + * where system events sporadically + * become disabled. */ + hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); + printk(MY_NOTICE "Re-enabled hotkeys\n"); + } else { + printk(MY_ERR "Error reading hotkey status\n"); + goto end; + } } + } else { + key_event_valid = 0; + last_key_event = 0; } p += sprintf(p, "hotkey_ready: %d\n", key_event_valid); p += sprintf(p, "hotkey: 0x%04x\n", last_key_event); + p += sprintf(p, "hotkeys_via_acpi: %d\n", hotkeys_over_acpi); end: return p; @@ -630,6 +663,191 @@ return p; } +/* /dev/toshiba and /proc/toshiba handlers {{{ + * + * ISSUE: lots of magic numbers and mysterious code + */ + +#define TOSH_MINOR_DEV 181 +#define OLD_PROC_TOSHIBA "toshiba" + +static int +tosh_acpi_bridge(SMMRegisters* regs) +{ + acpi_status status; + + /* assert(sizeof(SMMRegisters) == sizeof(u32)*HCI_WORDS); */ + status = hci_raw((u32*)regs, (u32*)regs); + if (status == AE_OK && (regs->eax & 0xff00) == HCI_SUCCESS) + return 0; + + return -EINVAL; +} + +static int +tosh_ioctl(struct inode* ip, struct file* fp, unsigned int cmd, + unsigned long arg) +{ + SMMRegisters regs; + unsigned short ax,bx; + int err; + + if ((!arg) || (cmd != TOSH_SMM)) + return -EINVAL; + + if (copy_from_user(®s, (SMMRegisters*)arg, sizeof(SMMRegisters))) + return -EFAULT; + + ax = regs.eax & 0xff00; + bx = regs.ebx & 0xffff; + + /* block HCI calls to read/write memory & PCI devices */ + if (((ax==HCI_SET) || (ax==HCI_GET)) && (bx>0x0069)) + return -EINVAL; + + err = tosh_acpi_bridge(®s); + + if (copy_to_user((SMMRegisters*)arg, ®s, sizeof(SMMRegisters))) + return -EFAULT; + + return err; +} + +static int +tosh_get_machine_id(void __iomem *bios) +{ + int id; + unsigned short bx,cx; + unsigned long address; + + id = (0x100*(int) readb(bios+0xfffe))+((int) readb(bios+0xfffa)); + + /* do we have a SCTTable machine identication number on our hands */ + if (id==0xfc2f) { + bx = 0xe6f5; /* cheat */ + /* now twiddle with our pointer a bit */ + address = 0x00000000 + bx; + cx = readw(bios + address); + address = 0x00000009 + bx + cx; + cx = readw(bios + address); + address = 0x0000000a + cx; + cx = readw(bios + address); + /* now construct our machine identification number */ + id = ((cx & 0xff)<<8)+((cx & 0xff00)>>8); + } + + return id; +} + +static int tosh_id; +static int tosh_bios; +static int tosh_date; +static int tosh_sci; + +static struct file_operations tosh_fops = { + .owner = THIS_MODULE, + .ioctl = tosh_ioctl +}; + +static struct miscdevice tosh_device = { + TOSH_MINOR_DEV, + "toshiba", + &tosh_fops +}; + +static void +setup_tosh_info(void __iomem *bios) +{ + int major, minor; + int day, month, year; + + tosh_id = tosh_get_machine_id(bios); + + /* get the BIOS version */ + major = readb(bios + 0xe009)-'0'; + minor = ((readb(bios + 0xe00b)-'0')*10)+(readb(bios + 0xe00c)-'0'); + tosh_bios = (major*0x100)+minor; + + /* get the BIOS date */ + day = ((readb(bios + 0xfff5)-'0')*10)+(readb(bios + 0xfff6)-'0'); + month = ((readb(bios + 0xfff8)-'0')*10)+(readb(bios + 0xfff9)-'0'); + year = ((readb(bios + 0xfffb)-'0')*10)+(readb(bios + 0xfffc)-'0'); + tosh_date = (((year-90) & 0x1f)<<10) | ((month & 0xf)<<6) + | ((day & 0x1f)<<1); +} + +/* /proc/toshiba read handler */ +static int +tosh_proc_show(struct seq_file *m, void *v) +{ + /* TODO: tosh_fn_status() */ + int key = 0; + + /* Format: + * 0) Linux driver version (this will change if format changes) + * 1) Machine ID + * 2) SCI version + * 3) BIOS version (major, minor) + * 4) BIOS date (in SCI date format) + * 5) Fn Key status + */ + + seq_printf(m, "1.1 0x%04x %d.%d %d.%d 0x%04x 0x%02x\n", + tosh_id, + (tosh_sci & 0xff00)>>8, + tosh_sci & 0xff, + (tosh_bios & 0xff00)>>8, + tosh_bios & 0xff, + tosh_date, + key); + + return 0; +} + +static int tosh_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, tosh_proc_show, NULL); +} + +static const struct file_operations tosh_proc_fops = { + .owner = THIS_MODULE, + .open = tosh_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init +old_driver_emulation_init(void) +{ + int status; + void __iomem *bios = ioremap(0xf0000, 0x10000); + if (!bios) + return -ENOMEM; + + if ((status = misc_register(&tosh_device))) { + printk(MY_ERR "failed to register misc device %d (\"%s\")\n", + tosh_device.minor, tosh_device.name); + return status; + } + + setup_tosh_info(bios); + proc_create(OLD_PROC_TOSHIBA, 0, NULL, &tosh_proc_fops); + + iounmap(bios); + + return 0; +} + +static void __exit +old_driver_emulation_exit(void) +{ + remove_proc_entry(OLD_PROC_TOSHIBA, NULL); + misc_deregister(&tosh_device); +} + +/* }}} end of /dev/toshiba and /proc/toshiba handlers */ + /* proc and module init */ @@ -676,6 +894,133 @@ .update_status = set_lcd_status, }; +static struct semaphore thread_sem; +static int thread_should_die; + +static struct acpi_device *threaded_device = 0; + +static void thread_deliver_button_event(u32 value) +{ + if (!threaded_device) return; + if( value == 0x0100 ) { + /* Ignore FN on its own */ + } else if( value & 0x80 ) { + acpi_bus_generate_proc_event( threaded_device, 1, value & ~0x80 ); + } else { + acpi_bus_generate_proc_event( threaded_device, 0, value ); + } +} + +static int toshiba_acpi_thread(void *data) +{ + int dropped = 0; + u32 hci_result, value; + + daemonize("ktoshkeyd"); + set_user_nice(current, 4); + thread_should_die = 0; + + up(&thread_sem); + + do { + /* In case we get stuck; we can rmmod the module here */ + if (thread_should_die) + break; + + hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result); + if (hci_result == HCI_SUCCESS) { + dropped++; + } else if (hci_result == HCI_EMPTY) { + /* better luck next time */ + } else if (hci_result == HCI_NOT_SUPPORTED) { + /* This is a workaround for an unresolved issue on + * some machines where system events sporadically + * become disabled. */ + hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); + printk(MY_NOTICE "Re-enabled hotkeys\n"); + } + } while (hci_result != HCI_EMPTY); + + printk(MY_INFO "Dropped %d keys from the queue on startup\n", dropped); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / hotkeys_check_per_sec); + + if (thread_should_die) + break; + + if (try_to_freeze()) + continue; + + do { + hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result); + if (hci_result == HCI_SUCCESS) { + thread_deliver_button_event(value); + } else if (hci_result == HCI_EMPTY) { + /* better luck next time */ + } else if (hci_result == HCI_NOT_SUPPORTED) { + /* This is a workaround for an + * unresolved issue on some machines + * where system events sporadically + * become disabled. */ + hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); + printk(MY_NOTICE "Re-enabled hotkeys\n"); + } + } while (hci_result == HCI_SUCCESS); + } + set_user_nice(current, -20); /* Become nasty so we are cleaned up + * before the module exits making us oops */ + up(&thread_sem); + return 0; +} + +static int acpi_toshkeys_add (struct acpi_device *device) +{ + threaded_device = device; + strcpy(acpi_device_name(device), "Toshiba laptop hotkeys"); + strcpy(acpi_device_class(device), "hkey"); + return 0; +} + +static int acpi_toshkeys_remove (struct acpi_device *device, int type) +{ + if (threaded_device == device) + threaded_device = 0; + return 0; +} + +static const struct acpi_device_id acpi_toshkeys_ids[] = { + { "TOS6200", 0 }, + { "TOS6207", 0 }, + { "TOS6208", 0 }, + {"", 0} +}; + +static struct acpi_driver acpi_threaded_toshkeys = { + .name = "Toshiba laptop hotkeys driver", + .class = "hkey", + .ids = acpi_toshkeys_ids, + .ops = { + .add = acpi_toshkeys_add, + .remove = acpi_toshkeys_remove, + }, +}; + +static int __init init_threaded_acpi(void) +{ + acpi_status result = AE_OK; + result = acpi_bus_register_driver(&acpi_threaded_toshkeys); + if( result < 0 ) + printk(MY_ERR "Registration of toshkeys acpi device failed\n"); + return result; +} + +static void kill_threaded_acpi(void) +{ + acpi_bus_unregister_driver(&acpi_threaded_toshkeys); +} + static void toshiba_acpi_exit(void) { if (toshiba_acpi.bt_rfk) { @@ -686,11 +1031,19 @@ if (toshiba_backlight_device) backlight_device_unregister(toshiba_backlight_device); + if (hotkeys_over_acpi) { + thread_should_die = 1; + down(&thread_sem); + kill_threaded_acpi(); + } + remove_device(); if (toshiba_proc_dir) remove_proc_entry(PROC_TOSHIBA, acpi_root_dir); + old_driver_emulation_exit(); + platform_device_unregister(toshiba_acpi.p_dev); return; @@ -730,6 +1083,9 @@ return ret; } + if ((ret = old_driver_emulation_init())) + return ret; + force_fan = 0; key_event_valid = 0; @@ -762,6 +1118,26 @@ } toshiba_backlight_device->props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1; + if (hotkeys_over_acpi && ACPI_SUCCESS(status)) { + printk(MY_INFO "Toshiba hotkeys are sent as ACPI events\n"); + if (hotkeys_check_per_sec < 1) + hotkeys_check_per_sec = 1; + if (hotkeys_check_per_sec > 10) + hotkeys_check_per_sec = 10; + printk(MY_INFO "ktoshkeyd will check %d time%s per second\n", + hotkeys_check_per_sec, hotkeys_check_per_sec==1?"":"s"); + if (init_threaded_acpi() >= 0) { + init_MUTEX_LOCKED(&thread_sem); + kernel_thread(toshiba_acpi_thread, NULL, CLONE_KERNEL); + down(&thread_sem); + } else { + remove_device(); + remove_proc_entry(PROC_TOSHIBA, acpi_root_dir); + status = AE_ERROR; + printk(MY_INFO "ktoshkeyd initialisation failed. Refusing to load module\n"); + } + } + /* Register rfkill switch for Bluetooth */ if (hci_get_bt_present(&bt_present) == HCI_SUCCESS && bt_present) { toshiba_acpi.bt_rfk = rfkill_alloc(toshiba_acpi.bt_name, --- linux-ec2-2.6.31.orig/drivers/platform/x86/dell-laptop.c +++ linux-ec2-2.6.31/drivers/platform/x86/dell-laptop.c @@ -22,9 +22,14 @@ #include #include #include +#include #include "../../firmware/dcdbas.h" #define BRIGHTNESS_TOKEN 0x7d +#define WLAN_SWITCH_MASK 0 +#define BT_SWITCH_MASK 1 +#define WWAN_SWITCH_MASK 2 +#define HW_SWITCH_MASK 16 /* This structure will be modified by the firmware when we enter * system management mode, hence the volatiles */ @@ -63,6 +68,13 @@ static struct rfkill *bluetooth_rfkill; static struct rfkill *wwan_rfkill; +/* + * RFkill status is maintained in software because the BIOS has an annoying + * habit of emitting a KEY_WLAN key press event before the BIOS state is updated, making + * dell_send_request() racy. + */ +static int hw_switch_status; + static const struct dmi_system_id __initdata dell_device_table[] = { { .ident = "Dell laptop", @@ -74,6 +86,54 @@ { } }; +static struct dmi_system_id __devinitdata dell_blacklist[] = { + /* BIOS always returns HW switch disabled */ + { + .ident = "Dell Vostro 1720", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"), + }, + }, + /* Supported by compal-laptop */ + { + .ident = "Dell Mini 9", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 910"), + }, + }, + { + .ident = "Dell Mini 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1010"), + }, + }, + { + .ident = "Dell Mini 10v", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1011"), + }, + }, + { + .ident = "Dell Inspiron 11z", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1110"), + }, + }, + { + .ident = "Dell Mini 12", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1210"), + }, + }, + {} +}; + static void parse_da_table(const struct dmi_header *dm) { /* Final token is a terminator, so we don't want to copy it */ @@ -180,10 +240,11 @@ int disable = blocked ? 1 : 0; unsigned long radio = (unsigned long)data; - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - buffer.input[0] = (1 | (radio<<8) | (disable << 16)); - dell_send_request(&buffer, 17, 11); - + if (!(hw_switch_status & BIT(radio-1)) || !(hw_switch_status & BIT(HW_SWITCH_MASK))) { + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = (1 | (radio<<8) | (disable << 16)); + dell_send_request(&buffer, 17, 11); + } return 0; } @@ -191,14 +252,32 @@ { struct calling_interface_buffer buffer; int status; - int bit = (unsigned long)data + 16; + int bit = (unsigned long)data - 1; memset(&buffer, 0, sizeof(struct calling_interface_buffer)); dell_send_request(&buffer, 17, 11); status = buffer.output[1]; - if (status & BIT(bit)) - rfkill_set_hw_state(rfkill, !!(status & BIT(16))); + hw_switch_status |= (status & BIT(HW_SWITCH_MASK)) ^ BIT(HW_SWITCH_MASK); + + /* HW switch control not supported + explicitly set it to all 3 as they'll change in unison then */ + if (!(status & BIT(0))) + hw_switch_status |= BIT(WLAN_SWITCH_MASK) | BIT(BT_SWITCH_MASK) | (WWAN_SWITCH_MASK); + else { + /* rerun the query to see what is really supported */ + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = 2; + dell_send_request(&buffer, 17, 11); + status = buffer.output[1]; + + hw_switch_status |= status & BIT(bit); + } + + if (hw_switch_status & BIT(bit)) + rfkill_set_hw_state(rfkill, hw_switch_status & BIT(HW_SWITCH_MASK)); + else + rfkill_set_hw_state(rfkill, 0); } static const struct rfkill_ops dell_rfkill_ops = { @@ -206,11 +285,35 @@ .query = dell_rfkill_query, }; +/* + * Called for each KEY_WLAN key press event. Note that a physical + * rf-kill switch change also causes the BIOS to emit a KEY_WLAN. + */ +static void dell_rfkill_update(void) +{ + hw_switch_status ^= BIT(HW_SWITCH_MASK); + if (wifi_rfkill && (hw_switch_status & BIT(WLAN_SWITCH_MASK))) { + rfkill_set_hw_state(wifi_rfkill, hw_switch_status & BIT(HW_SWITCH_MASK)); + dell_rfkill_set((void*)1, rfkill_blocked(wifi_rfkill)); + } + + if (bluetooth_rfkill && (hw_switch_status & BIT(BT_SWITCH_MASK))) { + rfkill_set_hw_state(bluetooth_rfkill, hw_switch_status & BIT(HW_SWITCH_MASK)); + dell_rfkill_set((void*)2, rfkill_blocked(bluetooth_rfkill)); + } + + if (wwan_rfkill && (hw_switch_status & BIT(WWAN_SWITCH_MASK))) { + rfkill_set_hw_state(wwan_rfkill, hw_switch_status & BIT(HW_SWITCH_MASK)); + dell_rfkill_set((void*)3, rfkill_blocked(wwan_rfkill)); + } +} + static int dell_setup_rfkill(void) { struct calling_interface_buffer buffer; int status; int ret; + hw_switch_status = 0; memset(&buffer, 0, sizeof(struct calling_interface_buffer)); dell_send_request(&buffer, 17, 11); @@ -310,6 +413,90 @@ .update_status = dell_send_intensity, }; +static const struct input_device_id dell_input_ids[] = { + { + .bustype = 0x11, + .vendor = 0x01, + .product = 0x01, + .version = 0xab41, + .flags = INPUT_DEVICE_ID_MATCH_BUS | + INPUT_DEVICE_ID_MATCH_VENDOR | + INPUT_DEVICE_ID_MATCH_PRODUCT | + INPUT_DEVICE_ID_MATCH_VERSION + }, + { }, +}; + +static bool dell_input_filter(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + if (type == EV_KEY && code == KEY_WLAN && value == 1) { + dell_rfkill_update(); + return 1; + } + + return 0; +} + +static void dell_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ +} + +static int dell_input_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "dell-laptop"; + + error = input_register_handle(handle); + if (error) + goto err_free_handle; + + error = input_open_device(handle); + if (error) + goto err_unregister_handle; + + error = input_filter_device(handle); + if (error) + goto err_close_handle; + + return 0; + +err_close_handle: + input_close_device(handle); +err_unregister_handle: + input_unregister_handle(handle); +err_free_handle: + kfree(handle); + return error; +} + +static void dell_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static struct input_handler dell_input_handler = { + .name = "dell-laptop", + .filter = dell_input_filter, + .event = dell_input_event, + .connect = dell_input_connect, + .disconnect = dell_input_disconnect, + .id_table = dell_input_ids, +}; + static int __init dell_init(void) { struct calling_interface_buffer buffer; @@ -319,6 +506,12 @@ if (!dmi_check_system(dell_device_table)) return -ENODEV; + if (dmi_check_system(dell_blacklist)) { + printk(KERN_INFO "dell-laptop: Blacklisted hardware detected - " + "not loading\n"); + return -ENODEV; + } + dmi_walk(find_tokens, NULL); if (!da_tokens) { @@ -333,6 +526,10 @@ goto out; } + if (input_register_handler(&dell_input_handler)) + printk(KERN_INFO + "dell-laptop: Could not register input filter\n"); + #ifdef CONFIG_ACPI /* In the event of an ACPI backlight being available, don't * register the platform controller. @@ -388,6 +585,7 @@ rfkill_unregister(bluetooth_rfkill); if (wwan_rfkill) rfkill_unregister(wwan_rfkill); + input_unregister_handler(&dell_input_handler); } module_init(dell_init); --- linux-ec2-2.6.31.orig/drivers/platform/x86/sony-laptop.c +++ linux-ec2-2.6.31/drivers/platform/x86/sony-laptop.c @@ -1081,6 +1081,8 @@ struct rfkill *rfk; enum rfkill_type type; const char *name; + int result; + bool hwblock; switch (nc_type) { case SONY_WIFI: @@ -1108,6 +1110,10 @@ if (!rfk) return -ENOMEM; + sony_call_snc_handle(0x124, 0x200, &result); + hwblock = !(result & 0x1); + rfkill_set_hw_state(rfk, hwblock); + err = rfkill_register(rfk); if (err) { rfkill_destroy(rfk); @@ -1399,10 +1405,13 @@ u16 evport_offset; u8 has_camera; u8 has_bluetooth; - u8 has_wwan; struct sonypi_eventtypes *event_types; }; +struct sony_pic_quirk_entry { + u8 set_wwan_power; +}; + struct sony_pic_dev { struct device_ctrl *control; struct acpi_device *acpi_dev; @@ -1411,6 +1420,7 @@ struct list_head interrupts; struct list_head ioports; struct mutex lock; + struct sony_pic_quirk_entry *quirks; u8 camera_power; u8 bluetooth_power; u8 wwan_power; @@ -2844,6 +2854,12 @@ if (result) goto err_remove_pf; + if (spic_dev.quirks && spic_dev.quirks->set_wwan_power) { + /* + * Power isn't enabled by default. + */ + __sony_pic_set_wwanpower(1); + } return 0; err_remove_pf: @@ -2914,6 +2930,16 @@ }, }; +static struct sony_pic_quirk_entry sony_pic_vaio_vgn = { + .set_wwan_power = 1, +}; + +static int dmi_matched(const struct dmi_system_id *dmi) +{ + spic_dev.quirks = dmi->driver_data; + return 0; +} + static struct dmi_system_id __initdata sonypi_dmi_table[] = { { .ident = "Sony Vaio", @@ -2928,6 +2954,8 @@ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "VGN-"), }, + .callback = dmi_matched, + .driver_data = &sony_pic_vaio_vgn, }, { } }; --- linux-ec2-2.6.31.orig/drivers/platform/x86/asus-laptop.c +++ linux-ec2-2.6.31/drivers/platform/x86/asus-laptop.c @@ -1172,8 +1172,8 @@ hotk->ledd_status = 0xFFF; /* Set initial values of light sensor and level */ - hotk->light_switch = 1; /* Default to light sensor disabled */ - hotk->light_level = 0; /* level 5 for sensor sensitivity */ + hotk->light_switch = 0; /* Default to light sensor disabled */ + hotk->light_level = 5; /* level 5 for sensor sensitivity */ if (ls_switch_handle) set_light_sens_switch(hotk->light_switch); --- linux-ec2-2.6.31.orig/drivers/platform/x86/thinkpad_acpi.c +++ linux-ec2-2.6.31/drivers/platform/x86/thinkpad_acpi.c @@ -3406,15 +3406,6 @@ #define TPACPI_RFK_BLUETOOTH_SW_NAME "tpacpi_bluetooth_sw" -static void bluetooth_suspend(pm_message_t state) -{ - /* Try to make sure radio will resume powered off */ - if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd", - TP_ACPI_BLTH_PWR_OFF_ON_RESUME)) - vdbg_printk(TPACPI_DBG_RFKILL, - "bluetooth power down on resume request failed\n"); -} - static int bluetooth_get_status(void) { int status; @@ -3448,10 +3439,9 @@ #endif /* We make sure to keep TP_ACPI_BLUETOOTH_RESUMECTRL off */ + status = TP_ACPI_BLUETOOTH_RESUMECTRL; if (state == TPACPI_RFK_RADIO_ON) - status = TP_ACPI_BLUETOOTH_RADIOSSW; - else - status = 0; + status |= TP_ACPI_BLUETOOTH_RADIOSSW; if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status)) return -EIO; @@ -3590,7 +3580,6 @@ .read = bluetooth_read, .write = bluetooth_write, .exit = bluetooth_exit, - .suspend = bluetooth_suspend, .shutdown = bluetooth_shutdown, }; @@ -3608,15 +3597,6 @@ #define TPACPI_RFK_WWAN_SW_NAME "tpacpi_wwan_sw" -static void wan_suspend(pm_message_t state) -{ - /* Try to make sure radio will resume powered off */ - if (!acpi_evalf(NULL, NULL, "\\WGSV", "qvd", - TP_ACPI_WGSV_PWR_OFF_ON_RESUME)) - vdbg_printk(TPACPI_DBG_RFKILL, - "WWAN power down on resume request failed\n"); -} - static int wan_get_status(void) { int status; @@ -3649,11 +3629,10 @@ } #endif - /* We make sure to keep TP_ACPI_WANCARD_RESUMECTRL off */ + /* We make sure to set TP_ACPI_WANCARD_RESUMECTRL */ + status = TP_ACPI_WANCARD_RESUMECTRL; if (state == TPACPI_RFK_RADIO_ON) - status = TP_ACPI_WANCARD_RADIOSSW; - else - status = 0; + status |= TP_ACPI_WANCARD_RADIOSSW; if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status)) return -EIO; @@ -3791,7 +3770,6 @@ .read = wan_read, .write = wan_write, .exit = wan_exit, - .suspend = wan_suspend, .shutdown = wan_shutdown, }; @@ -5655,16 +5633,16 @@ /* Models with ATI GPUs known to require ECNVRAM mode */ TPACPI_Q_IBM('1', 'Y', TPACPI_BRGHT_Q_EC), /* T43/p ATI */ - /* Models with ATI GPUs (waiting confirmation) */ - TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + /* Models with ATI GPUs that can use ECNVRAM */ + TPACPI_Q_IBM('1', 'R', TPACPI_BRGHT_Q_EC), TPACPI_Q_IBM('1', 'Q', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), TPACPI_Q_IBM('7', '6', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), TPACPI_Q_IBM('7', '8', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), - /* Models with Intel Extreme Graphics 2 (waiting confirmation) */ - TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), - TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), - TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_NOEC), + /* Models with Intel Extreme Graphics 2 */ + TPACPI_Q_IBM('1', 'U', TPACPI_BRGHT_Q_NOEC), + TPACPI_Q_IBM('1', 'V', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), + TPACPI_Q_IBM('1', 'W', TPACPI_BRGHT_Q_ASK|TPACPI_BRGHT_Q_EC), /* Models with Intel GMA900 */ TPACPI_Q_IBM('7', '0', TPACPI_BRGHT_Q_NOEC), /* T43, R52 */ @@ -5863,7 +5841,7 @@ * Doing it this way makes the syscall restartable in case of EINTR */ rc = brightness_set(level); - return (rc == -EINTR)? ERESTARTSYS : rc; + return (rc == -EINTR)? -ERESTARTSYS : rc; } static struct ibm_struct brightness_driver_data = { --- linux-ec2-2.6.31.orig/drivers/platform/x86/dell-wmi.c +++ linux-ec2-2.6.31/drivers/platform/x86/dell-wmi.c @@ -40,6 +40,10 @@ MODULE_ALIAS("wmi:"DELL_EVENT_GUID); +/* Temporary workaround until the WMI sysfs interface goes in. + Borrowed from acer-wmi */ +MODULE_ALIAS("dmi:*:*Dell*:*:"); + struct key_entry { char type; /* See KE_* below */ u16 code; --- linux-ec2-2.6.31.orig/drivers/w1/w1_netlink.c +++ linux-ec2-2.6.31/drivers/w1/w1_netlink.c @@ -306,9 +306,8 @@ return error; } -static void w1_cn_callback(void *data) +static void w1_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { - struct cn_msg *msg = data; struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1); struct w1_netlink_cmd *cmd; struct w1_slave *sl; --- linux-ec2-2.6.31.orig/drivers/firewire/ohci.c +++ linux-ec2-2.6.31/drivers/firewire/ohci.c @@ -2180,6 +2180,13 @@ page = payload >> PAGE_SHIFT; offset = payload & ~PAGE_MASK; rest = p->payload_length; + /* + * The controllers I've tested have not worked correctly when + * second_req_count is zero. Rather than do something we know won't + * work, return an error + */ + if (rest == 0) + return -EINVAL; /* FIXME: make packet-per-buffer/dual-buffer a context option */ while (rest > 0) { @@ -2233,7 +2240,7 @@ unsigned long payload) { struct iso_context *ctx = container_of(base, struct iso_context, base); - struct descriptor *d = NULL, *pd = NULL; + struct descriptor *d, *pd; struct fw_iso_packet *p = packet; dma_addr_t d_bus, page_bus; u32 z, header_z, rest; @@ -2271,8 +2278,9 @@ d->data_address = cpu_to_le32(d_bus + (z * sizeof(*d))); rest = payload_per_buffer; + pd = d; for (j = 1; j < z; j++) { - pd = d + j; + pd++; pd->control = cpu_to_le16(DESCRIPTOR_STATUS | DESCRIPTOR_INPUT_MORE); --- linux-ec2-2.6.31.orig/drivers/pps/kapi.c +++ linux-ec2-2.6.31/drivers/pps/kapi.c @@ -271,6 +271,7 @@ { struct pps_device *pps; unsigned long flags; + int captured = 0; if ((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0) { printk(KERN_ERR "pps: unknown event (%x) for source %d\n", @@ -293,7 +294,8 @@ /* Check the event */ pps->current_mode = pps->params.mode; - if (event & PPS_CAPTUREASSERT) { + if ((event & PPS_CAPTUREASSERT) & + (pps->params.mode & PPS_CAPTUREASSERT)) { /* We have to add an offset? */ if (pps->params.mode & PPS_OFFSETASSERT) pps_add_offset(ts, &pps->params.assert_off_tu); @@ -303,8 +305,11 @@ pps->assert_sequence++; pr_debug("capture assert seq #%u for source %d\n", pps->assert_sequence, source); + + captured = ~0; } - if (event & PPS_CAPTURECLEAR) { + if ((event & PPS_CAPTURECLEAR) & + (pps->params.mode & PPS_CAPTURECLEAR)) { /* We have to add an offset? */ if (pps->params.mode & PPS_OFFSETCLEAR) pps_add_offset(ts, &pps->params.clear_off_tu); @@ -314,12 +319,17 @@ pps->clear_sequence++; pr_debug("capture clear seq #%u for source %d\n", pps->clear_sequence, source); + + captured = ~0; } - pps->go = ~0; - wake_up_interruptible(&pps->queue); + /* Wake up iif captured somthing */ + if (captured) { + pps->go = ~0; + wake_up_interruptible(&pps->queue); - kill_fasync(&pps->async_queue, SIGIO, POLL_IN); + kill_fasync(&pps->async_queue, SIGIO, POLL_IN); + } spin_unlock_irqrestore(&pps->lock, flags); --- linux-ec2-2.6.31.orig/drivers/pps/pps.c +++ linux-ec2-2.6.31/drivers/pps/pps.c @@ -71,9 +71,14 @@ case PPS_GETPARAMS: pr_debug("PPS_GETPARAMS: source %d\n", pps->id); - /* Return current parameters */ - err = copy_to_user(uarg, &pps->params, - sizeof(struct pps_kparams)); + spin_lock_irq(&pps->lock); + + /* Get the current parameters */ + params = pps->params; + + spin_unlock_irq(&pps->lock); + + err = copy_to_user(uarg, ¶ms, sizeof(struct pps_kparams)); if (err) return -EFAULT; --- linux-ec2-2.6.31.orig/drivers/staging/rt2860/common/cmm_data_2860.c +++ linux-ec2-2.6.31/drivers/staging/rt2860/common/cmm_data_2860.c @@ -363,6 +363,8 @@ ULONG SwIdx = pAd->MgmtRing.TxCpuIdx; pTxD = (PTXD_STRUC) pAd->MgmtRing.Cell[SwIdx].AllocVa; + if (!pTxD) + return 0; pAd->MgmtRing.Cell[SwIdx].pNdisPacket = pPacket; pAd->MgmtRing.Cell[SwIdx].pNextNdisPacket = NULL; --- linux-ec2-2.6.31.orig/drivers/staging/dst/dcore.c +++ linux-ec2-2.6.31/drivers/staging/dst/dcore.c @@ -846,15 +846,19 @@ /* * Configuration parser. */ -static void cn_dst_callback(void *data) +static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { struct dst_ctl *ctl; - struct cn_msg *msg = data; int err; struct dst_ctl_ack ack; struct dst_node *n = NULL, *tmp; unsigned int hash; + if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + if (msg->len < sizeof(struct dst_ctl)) { err = -EBADMSG; goto out; --- linux-ec2-2.6.31.orig/drivers/staging/pohmelfs/config.c +++ linux-ec2-2.6.31/drivers/staging/pohmelfs/config.c @@ -446,11 +446,13 @@ return err; } -static void pohmelfs_cn_callback(void *data) +static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { - struct cn_msg *msg = data; int err; + if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) + return; + switch (msg->flags) { case POHMELFS_FLAGS_ADD: case POHMELFS_FLAGS_DEL: --- linux-ec2-2.6.31.orig/drivers/staging/rt2870/rt2870.h +++ linux-ec2-2.6.31/drivers/staging/rt2870/rt2870.h @@ -142,6 +142,7 @@ {USB_DEVICE(0x0789,0x0163)}, /* Logitec */ \ {USB_DEVICE(0x0789,0x0164)}, /* Logitec */ \ {USB_DEVICE(0x7392,0x7717)}, /* Edimax */ \ + {USB_DEVICE(0x1737,0x0071)}, /* Linksys */ \ { }/* Terminating entry */ \ } #endif --- linux-ec2-2.6.31.orig/drivers/staging/vt6655/ttype.h +++ linux-ec2-2.6.31/drivers/staging/vt6655/ttype.h @@ -27,6 +27,9 @@ #ifndef __TTYPE_H__ #define __TTYPE_H__ +#ifdef CONFIG_XEN +#include +#endif /******* Common definitions and typedefs ***********************************/ --- linux-ec2-2.6.31.orig/drivers/ps3/ps3stor_lib.c +++ linux-ec2-2.6.31/drivers/ps3/ps3stor_lib.c @@ -23,6 +23,65 @@ #include #include +/* + * A workaround for flash memory I/O errors when the internal hard disk + * has not been formatted for OtherOS use. Delay disk close until flash + * memory is closed. + */ + +static struct ps3_flash_workaround { + int flash_open; + int disk_open; + struct ps3_system_bus_device *disk_sbd; +} ps3_flash_workaround; + +static int ps3stor_open_hv_device(struct ps3_system_bus_device *sbd) +{ + int error = ps3_open_hv_device(sbd); + + if (error) + return error; + + if (sbd->match_id == PS3_MATCH_ID_STOR_FLASH) + ps3_flash_workaround.flash_open = 1; + + if (sbd->match_id == PS3_MATCH_ID_STOR_DISK) + ps3_flash_workaround.disk_open = 1; + + return 0; +} + +static int ps3stor_close_hv_device(struct ps3_system_bus_device *sbd) +{ + int error; + + if (sbd->match_id == PS3_MATCH_ID_STOR_DISK + && ps3_flash_workaround.disk_open + && ps3_flash_workaround.flash_open) { + ps3_flash_workaround.disk_sbd = sbd; + return 0; + } + + error = ps3_close_hv_device(sbd); + + if (error) + return error; + + if (sbd->match_id == PS3_MATCH_ID_STOR_DISK) + ps3_flash_workaround.disk_open = 0; + + if (sbd->match_id == PS3_MATCH_ID_STOR_FLASH) { + ps3_flash_workaround.flash_open = 0; + + if (ps3_flash_workaround.disk_sbd) { + ps3_close_hv_device(ps3_flash_workaround.disk_sbd); + ps3_flash_workaround.disk_open = 0; + ps3_flash_workaround.disk_sbd = NULL; + } + } + + return 0; +} static int ps3stor_probe_access(struct ps3_storage_device *dev) { @@ -90,7 +149,7 @@ int error, res, alignment; enum ps3_dma_page_size page_size; - error = ps3_open_hv_device(&dev->sbd); + error = ps3stor_open_hv_device(&dev->sbd); if (error) { dev_err(&dev->sbd.core, "%s:%u: ps3_open_hv_device failed %d\n", __func__, @@ -166,7 +225,7 @@ fail_sb_event_receive_port_destroy: ps3_sb_event_receive_port_destroy(&dev->sbd, dev->irq); fail_close_device: - ps3_close_hv_device(&dev->sbd); + ps3stor_close_hv_device(&dev->sbd); fail: return error; } @@ -193,7 +252,7 @@ "%s:%u: destroy event receive port failed %d\n", __func__, __LINE__, error); - error = ps3_close_hv_device(&dev->sbd); + error = ps3stor_close_hv_device(&dev->sbd); if (error) dev_err(&dev->sbd.core, "%s:%u: ps3_close_hv_device failed %d\n", __func__, --- linux-ec2-2.6.31.orig/drivers/input/xen-kbdfront.c +++ linux-ec2-2.6.31/drivers/input/xen-kbdfront.c @@ -325,7 +325,6 @@ static struct xenbus_driver xenkbd_driver = { .name = "vkbd", - .owner = THIS_MODULE, .ids = xenkbd_ids, .probe = xenkbd_probe, .remove = xenkbd_remove, --- linux-ec2-2.6.31.orig/drivers/input/input.c +++ linux-ec2-2.6.31/drivers/input/input.c @@ -88,19 +88,26 @@ */ static void input_pass_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) -{ - struct input_handle *handle; + +{ struct input_handle *handle; rcu_read_lock(); handle = rcu_dereference(dev->grab); - if (handle) + if (handle) { handle->handler->event(handle, type, code, value); - else - list_for_each_entry_rcu(handle, &dev->h_list, d_node) - if (handle->open) - handle->handler->event(handle, - type, code, value); + goto out; + } + + handle = rcu_dereference(dev->filter); + if (handle && handle->handler->filter(handle, type, code, value)) + goto out; + + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) + handle->handler->event(handle, + type, code, value); +out: rcu_read_unlock(); } @@ -375,12 +382,15 @@ } EXPORT_SYMBOL(input_grab_device); -static void __input_release_device(struct input_handle *handle) +static void __input_release_device(struct input_handle *handle, bool filter) { struct input_dev *dev = handle->dev; - if (dev->grab == handle) { - rcu_assign_pointer(dev->grab, NULL); + if (handle == (filter ? dev->filter : dev->grab)) { + if (filter) + rcu_assign_pointer(dev->filter, NULL); + else + rcu_assign_pointer(dev->grab, NULL); /* Make sure input_pass_event() notices that grab is gone */ synchronize_rcu(); @@ -404,12 +414,65 @@ struct input_dev *dev = handle->dev; mutex_lock(&dev->mutex); - __input_release_device(handle); + __input_release_device(handle, false); mutex_unlock(&dev->mutex); } EXPORT_SYMBOL(input_release_device); /** + * input_filter_device - allow input events to be filtered from higher layers + * @handle: input handle that wants to filter the device + * + * When a device is filtered by an input handle all events generated by + * the device are to this handle. If the filter function returns true then + * the event is discarded rather than being passed to any other input handles, + * otherwise it is passed to them as normal. Grabs will be handled before + * filters, so a grabbed device will not deliver events to a filter function. + */ +int input_filter_device(struct input_handle *handle) +{ + struct input_dev *dev = handle->dev; + int retval; + + retval = mutex_lock_interruptible(&dev->mutex); + if (retval) + return retval; + + if (dev->filter) { + retval = -EBUSY; + goto out; + } + + rcu_assign_pointer(dev->filter, handle); + synchronize_rcu(); + + out: + mutex_unlock(&dev->mutex); + return retval; +} +EXPORT_SYMBOL(input_filter_device); + +/** + * input_unfilter_device - removes a filter from a device + * @handle: input handle that owns the device + * + * Removes the filter from a device so that other input handles can + * start receiving unfiltered input events. Upon release all handlers + * attached to the device have their start() method called so they + * have a change to synchronize device state with the rest of the + * system. + */ +void input_unfilter_device(struct input_handle *handle) +{ + struct input_dev *dev = handle->dev; + + mutex_lock(&dev->mutex); + __input_release_device(handle, true); + mutex_unlock(&dev->mutex); +} +EXPORT_SYMBOL(input_unfilter_device); + +/** * input_open_device - open input device * @handle: handle through which device is being accessed * @@ -482,7 +545,9 @@ mutex_lock(&dev->mutex); - __input_release_device(handle); + /* Release both grabs and filters */ + __input_release_device(handle, false); + __input_release_device(handle, true); if (!--dev->users && dev->close) dev->close(dev); --- linux-ec2-2.6.31.orig/drivers/input/serio/i8042-x86ia64io.h +++ linux-ec2-2.6.31/drivers/input/serio/i8042-x86ia64io.h @@ -457,6 +457,34 @@ }, { } }; + +static struct dmi_system_id __initdata i8042_dmi_laptop_table[] = { + { + .ident = "Portable", + .matches = { + DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ + }, + }, + { + .ident = "Laptop", + .matches = { + DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */ + }, + }, + { + .ident = "Notebook", + .matches = { + DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ + }, + }, + { + .ident = "Sub-Notebook", + .matches = { + DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */ + }, + }, + { } +}; #endif /* @@ -735,6 +763,11 @@ i8042_kbd_irq = i8042_pnp_kbd_irq; i8042_aux_irq = i8042_pnp_aux_irq; +#ifdef CONFIG_X86 + i8042_bypass_aux_irq_test = !pnp_data_busted && + dmi_check_system(i8042_dmi_laptop_table); +#endif + return 0; } --- linux-ec2-2.6.31.orig/drivers/input/serio/i8042.c +++ linux-ec2-2.6.31/drivers/input/serio/i8042.c @@ -83,6 +83,8 @@ MODULE_PARM_DESC(debug, "Turn i8042 debugging mode on and off"); #endif +static bool i8042_bypass_aux_irq_test; + #include "i8042.h" static DEFINE_SPINLOCK(i8042_lock); @@ -641,7 +643,7 @@ * used it for a PCI card or somethig else. */ - if (i8042_noloop || aux_loop_broken) { + if (i8042_noloop || i8042_bypass_aux_irq_test || aux_loop_broken) { /* * Without LOOP command we can't test AUX IRQ delivery. Assume the port * is working and hope we are right. --- linux-ec2-2.6.31.orig/drivers/input/keyboard/atkbd.c +++ linux-ec2-2.6.31/drivers/input/keyboard/atkbd.c @@ -857,6 +857,14 @@ }; /* + * Dell Studio 1557 does not generate release keys for + * mute, volume up, & volume down + */ +static unsigned int atkdb_dell_studio_1157_force_relase_keys[] = { + 0xa0, 0xae, 0xb0, -1U +}; + +/* * Perform fixup for HP system that doesn't generate release * for its video switch */ @@ -910,6 +918,13 @@ }; /* + * Amilo Si 1848 key release for Fn+Volume keys not working + */ +static unsigned int atkbd_amilo_si1848_forced_release_keys[] = { + 0xa0, 0xae, 0xb0, -1U +}; + +/* * Amilo Xi 3650 key release for light touch bar not working */ static unsigned int atkbd_amilo_xi3650_forced_release_keys[] = { @@ -917,6 +932,14 @@ }; /* + * Fujitsu Siemens system with broken key release on volume keys and mute key + */ + +static unsigned int atkbd_amilo_xi_2428_forced_release_keys[] = { + 0xa0, 0xae, 0xb0, -1U +}; + +/* * Soltech TA12 system with broken key release on volume keys and mute key */ static unsigned int atkdb_soltech_ta12_forced_release_keys[] = { @@ -1527,6 +1550,15 @@ .driver_data = atkbd_dell_laptop_forced_release_keys, }, { + .ident = "Dell Stuido 1557", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkdb_dell_studio_1157_force_relase_keys, + }, + { .ident = "HP 2133", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), @@ -1608,6 +1640,24 @@ .driver_data = atkbd_samsung_forced_release_keys, }, { + .ident = "Samsung Q210/P210", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "Q210/P210"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_samsung_forced_release_keys, + }, + { + .ident = "Samsung R59P/R60P/R61P", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "R59P/R60P/R61P"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_samsung_forced_release_keys, + }, + { .ident = "Fujitsu Amilo PA 1510", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), @@ -1617,6 +1667,15 @@ .driver_data = atkbd_amilo_pa1510_forced_release_keys, }, { + .ident = "Fujitsu Amilo Si 1848+u", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Si 1848+u"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_amilo_si1848_forced_release_keys, + }, + { .ident = "Fujitsu Amilo Pi 3525", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), @@ -1635,6 +1694,43 @@ .driver_data = atkbd_amilo_xi3650_forced_release_keys, }, { + .ident = "Znote 6615WD", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Zepto"), + DMI_MATCH(DMI_PRODUCT_NAME, "Znote 6615WD"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_inventec_forced_release_keys, + }, + { + .ident = "Znote 6625WD", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Zepto"), + DMI_MATCH(DMI_PRODUCT_NAME, "Znote"), + DMI_MATCH(DMI_PRODUCT_VERSION, "6625WD"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_inventec_forced_release_keys, + }, + { + .ident = "AMILO Xi 2428", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xi 2428"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkbd_amilo_xi_2428_forced_release_keys, + }, + { + .ident = "Soltech Corporation TA12", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Soltech Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "TA12"), + }, + .callback = atkbd_setup_forced_release, + .driver_data = atkdb_soltech_ta12_forced_release_keys, + }, + { .ident = "Soltech Corporation TA12", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Soltech Corporation"), --- linux-ec2-2.6.31.orig/drivers/input/mouse/alps.h +++ linux-ec2-2.6.31/drivers/input/mouse/alps.h @@ -23,6 +23,7 @@ char phys[32]; /* Phys */ const struct alps_model_info *i;/* Info */ int prev_fin; /* Finger bit from previous packet */ + struct timer_list timer; }; #ifdef CONFIG_MOUSE_PS2_ALPS --- linux-ec2-2.6.31.orig/drivers/input/mouse/synaptics.c +++ linux-ec2-2.6.31/drivers/input/mouse/synaptics.c @@ -652,6 +652,16 @@ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE M300"), }, + + }, + { + .ident = "Toshiba Portege M300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "Portable PC"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Version 1.0"), + }, + }, { } }; --- linux-ec2-2.6.31.orig/drivers/input/mouse/alps.c +++ linux-ec2-2.6.31/drivers/input/mouse/alps.c @@ -5,6 +5,7 @@ * Copyright (c) 2003-2005 Peter Osterlund * Copyright (c) 2004 Dmitry Torokhov * Copyright (c) 2005 Vojtech Pavlik + * Copyright (c) 2009 Sebastian Kapfer * * ALPS detection, tap switching and status querying info is taken from * tpconfig utility (by C. Scott Ananian and Bruce Kall). @@ -35,6 +36,8 @@ #define ALPS_OLDPROTO 0x10 #define ALPS_PASS 0x20 #define ALPS_FW_BK_2 0x40 +#define ALPS_PS2_INTERLEAVED 0x80 /* 3-byte PS/2 packet interleaved with + 6-byte ALPS packet */ static const struct alps_model_info alps_model_data[] = { { { 0x32, 0x02, 0x14 }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* Toshiba Salellite Pro M10 */ @@ -55,7 +58,9 @@ { { 0x20, 0x02, 0x0e }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */ { { 0x22, 0x02, 0x0a }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, { { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */ - { { 0x62, 0x02, 0x14 }, 0xcf, 0xcf, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude E6500 */ + /* Dell Latitude E5500, E6400, E6500, Precision M4400 */ + { { 0x62, 0x02, 0x14 }, 0xcf, 0xcf, + ALPS_PASS | ALPS_DUALPOINT | ALPS_PS2_INTERLEAVED }, { { 0x73, 0x02, 0x50 }, 0xcf, 0xcf, ALPS_FW_BK_1 }, /* Dell Vostro 1400 */ }; @@ -66,20 +71,88 @@ */ /* - * ALPS abolute Mode - new format + * PS/2 packet format + * + * byte 0: 0 0 YSGN XSGN 1 M R L + * byte 1: X7 X6 X5 X4 X3 X2 X1 X0 + * byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + * + * Note that the device never signals overflow condition. + * + * ALPS absolute Mode - new format * * byte 0: 1 ? ? ? 1 ? ? ? * byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - * byte 2: 0 x10 x9 x8 x7 ? fin ges + * byte 2: 0 x10 x9 x8 x7 ? fin ges * byte 3: 0 y9 y8 y7 1 M R L * byte 4: 0 y6 y5 y4 y3 y2 y1 y0 * byte 5: 0 z6 z5 z4 z3 z2 z1 z0 * + * Dualpoint device -- interleaved packet format + * + * byte 0: 1 1 0 0 1 1 1 1 + * byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + * byte 2: 0 x10 x9 x8 x7 0 fin ges + * byte 3: 0 0 YSGN XSGN 1 1 1 1 + * byte 4: X7 X6 X5 X4 X3 X2 X1 X0 + * byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + * byte 6: 0 y9 y8 y7 1 m r l + * byte 7: 0 y6 y5 y4 y3 y2 y1 y0 + * byte 8: 0 z6 z5 z4 z3 z2 z1 z0 + * + * CAPITALS = stick, miniscules = touchpad + * * ?'s can have different meanings on different models, * such as wheel rotation, extra buttons, stick buttons * on a dualpoint, etc. */ +static bool alps_is_valid_first_byte(const struct alps_model_info *model, + unsigned char data) +{ + return (data & model->mask0) == model->byte0; +} + +static void alps_report_buttons(struct psmouse *psmouse, + struct input_dev *dev1, struct input_dev *dev2, + int left, int right, int middle) +{ + struct alps_data *priv = psmouse->private; + const struct alps_model_info *model = priv->i; + + if (model->flags & ALPS_PS2_INTERLEAVED) { + struct input_dev *dev; + + /* + * If shared button has already been reported on the + * other device (dev2) then this event should be also + * sent through that device. + */ + dev = test_bit(BTN_LEFT, dev2->key) ? dev2 : dev1; + input_report_key(dev, BTN_LEFT, left); + + dev = test_bit(BTN_RIGHT, dev2->key) ? dev2 : dev1; + input_report_key(dev, BTN_RIGHT, right); + + dev = test_bit(BTN_MIDDLE, dev2->key) ? dev2 : dev1; + input_report_key(dev, BTN_MIDDLE, middle); + + /* + * Sync the _other_ device now, we'll do the first + * device later once we report the rest of the events. + */ + input_sync(dev2); + } else { + /* + * For devices with non-interleaved packets we know what + * device buttons belong to so we can simply report them. + */ + input_report_key(dev1, BTN_LEFT, left); + input_report_key(dev1, BTN_RIGHT, right); + input_report_key(dev1, BTN_MIDDLE, middle); + } +} + static void alps_process_packet(struct psmouse *psmouse) { struct alps_data *priv = psmouse->private; @@ -89,18 +162,6 @@ int x, y, z, ges, fin, left, right, middle; int back = 0, forward = 0; - if ((packet[0] & 0xc8) == 0x08) { /* 3-byte PS/2 packet */ - input_report_key(dev2, BTN_LEFT, packet[0] & 1); - input_report_key(dev2, BTN_RIGHT, packet[0] & 2); - input_report_key(dev2, BTN_MIDDLE, packet[0] & 4); - input_report_rel(dev2, REL_X, - packet[1] ? packet[1] - ((packet[0] << 4) & 0x100) : 0); - input_report_rel(dev2, REL_Y, - packet[2] ? ((packet[0] << 3) & 0x100) - packet[2] : 0); - input_sync(dev2); - return; - } - if (priv->i->flags & ALPS_OLDPROTO) { left = packet[2] & 0x10; right = packet[2] & 0x08; @@ -136,18 +197,13 @@ input_report_rel(dev2, REL_X, (x > 383 ? (x - 768) : x)); input_report_rel(dev2, REL_Y, -(y > 255 ? (y - 512) : y)); - input_report_key(dev2, BTN_LEFT, left); - input_report_key(dev2, BTN_RIGHT, right); - input_report_key(dev2, BTN_MIDDLE, middle); + alps_report_buttons(psmouse, dev2, dev, left, right, middle); - input_sync(dev); input_sync(dev2); return; } - input_report_key(dev, BTN_LEFT, left); - input_report_key(dev, BTN_RIGHT, right); - input_report_key(dev, BTN_MIDDLE, middle); + alps_report_buttons(psmouse, dev, dev2, left, right, middle); /* Convert hardware tap to a reasonable Z value */ if (ges && !fin) z = 40; @@ -188,25 +244,168 @@ input_sync(dev); } +static void alps_report_bare_ps2_packet(struct psmouse *psmouse, + unsigned char packet[], + bool report_buttons) +{ + struct alps_data *priv = psmouse->private; + struct input_dev *dev2 = priv->dev2; + + if (report_buttons) + alps_report_buttons(psmouse, dev2, psmouse->dev, + packet[0] & 1, packet[0] & 2, packet[0] & 4); + + input_report_rel(dev2, REL_X, + packet[1] ? packet[1] - ((packet[0] << 4) & 0x100) : 0); + input_report_rel(dev2, REL_Y, + packet[2] ? ((packet[0] << 3) & 0x100) - packet[2] : 0); + + input_sync(dev2); +} + +static psmouse_ret_t alps_handle_interleaved_ps2(struct psmouse *psmouse) +{ + struct alps_data *priv = psmouse->private; + + if (psmouse->pktcnt < 6) + return PSMOUSE_GOOD_DATA; + + if (psmouse->pktcnt == 6) { + /* + * Start a timer to flush the packet if it ends up last + * 6-byte packet in the stream. Timer needs to fire + * psmouse core times out itself. 20 ms should be enough + * to decide if we are getting more data or not. + */ + mod_timer(&priv->timer, jiffies + msecs_to_jiffies(20)); + return PSMOUSE_GOOD_DATA; + } + + del_timer(&priv->timer); + + if (psmouse->packet[6] & 0x80) { + + /* + * Highest bit is set - that means we either had + * complete ALPS packet and this is start of the + * next packet or we got garbage. + */ + + if (((psmouse->packet[3] | + psmouse->packet[4] | + psmouse->packet[5]) & 0x80) || + (!alps_is_valid_first_byte(priv->i, psmouse->packet[6]))) { + dbg("refusing packet %x %x %x %x " + "(suspected interleaved ps/2)\n", + psmouse->packet[3], psmouse->packet[4], + psmouse->packet[5], psmouse->packet[6]); + return PSMOUSE_BAD_DATA; + } + + alps_process_packet(psmouse); + + /* Continue with the next packet */ + psmouse->packet[0] = psmouse->packet[6]; + psmouse->pktcnt = 1; + + } else { + + /* + * High bit is 0 - that means that we indeed got a PS/2 + * packet in the middle of ALPS packet. + * + * There is also possibility that we got 6-byte ALPS + * packet followed by 3-byte packet from trackpoint. We + * can not distinguish between these 2 scenarios but + * becase the latter is unlikely to happen in course of + * normal operation (user would need to press all + * buttons on the pad and start moving trackpoint + * without touching the pad surface) we assume former. + * Even if we are wrong the wost thing that would happen + * the cursor would jump but we should not get protocol + * desynchronization. + */ + + alps_report_bare_ps2_packet(psmouse, &psmouse->packet[3], + false); + + /* + * Continue with the standard ALPS protocol handling, + * but make sure we won't process it as an interleaved + * packet again, which may happen if all buttons are + * pressed. To avoid this let's reset the 4th bit which + * is normally 1. + */ + psmouse->packet[3] = psmouse->packet[6] & 0xf7; + psmouse->pktcnt = 4; + } + + return PSMOUSE_GOOD_DATA; +} + +static void alps_flush_packet(unsigned long data) +{ + struct psmouse *psmouse = (struct psmouse *)data; + + serio_pause_rx(psmouse->ps2dev.serio); + + if (psmouse->pktcnt == 6) { + + /* + * We did not any more data in reasonable amount of time. + * Validate the last 3 bytes and process as a standard + * ALPS packet. + */ + if ((psmouse->packet[3] | + psmouse->packet[4] | + psmouse->packet[5]) & 0x80) { + dbg("refusing packet %x %x %x " + "(suspected interleaved ps/2)\n", + psmouse->packet[3], psmouse->packet[4], + psmouse->packet[5]); + } else { + alps_process_packet(psmouse); + } + psmouse->pktcnt = 0; + } + + serio_continue_rx(psmouse->ps2dev.serio); +} + static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) { struct alps_data *priv = psmouse->private; + const struct alps_model_info *model = priv->i; if ((psmouse->packet[0] & 0xc8) == 0x08) { /* PS/2 packet */ if (psmouse->pktcnt == 3) { - alps_process_packet(psmouse); + alps_report_bare_ps2_packet(psmouse, psmouse->packet, + true); return PSMOUSE_FULL_PACKET; } return PSMOUSE_GOOD_DATA; } - if ((psmouse->packet[0] & priv->i->mask0) != priv->i->byte0) + /* Check for PS/2 packet stuffed in the middle of ALPS packet. */ + + if ((model->flags & ALPS_PS2_INTERLEAVED) && + psmouse->pktcnt >= 4 && (psmouse->packet[3] & 0x0f) == 0x0f) { + return alps_handle_interleaved_ps2(psmouse); + } + + if (!alps_is_valid_first_byte(model, psmouse->packet[0])) { + dbg("refusing packet[0] = %x (mask0 = %x, byte0 = %x)\n", + psmouse->packet[0], model->mask0, model->byte0); return PSMOUSE_BAD_DATA; + } /* Bytes 2 - 6 should have 0 in the highest bit */ if (psmouse->pktcnt >= 2 && psmouse->pktcnt <= 6 && - (psmouse->packet[psmouse->pktcnt - 1] & 0x80)) + (psmouse->packet[psmouse->pktcnt - 1] & 0x80)) { + dbg("refusing packet[%i] = %x\n", + psmouse->pktcnt - 1, psmouse->packet[psmouse->pktcnt - 1]); return PSMOUSE_BAD_DATA; + } if (psmouse->pktcnt == 6) { alps_process_packet(psmouse); @@ -428,7 +627,8 @@ static int alps_reconnect(struct psmouse *psmouse) { - psmouse_reset(psmouse); + /* UBUNTU: Causes lockups on resume */ + /* psmouse_reset(psmouse); */ if (alps_hw_init(psmouse, NULL)) return -1; @@ -441,6 +641,7 @@ struct alps_data *priv = psmouse->private; psmouse_reset(psmouse); + del_timer_sync(&priv->timer); input_unregister_device(priv->dev2); kfree(priv); } @@ -457,6 +658,8 @@ goto init_fail; priv->dev2 = dev2; + setup_timer(&priv->timer, alps_flush_packet, (unsigned long)psmouse); + psmouse->private = priv; if (alps_hw_init(psmouse, &version)) --- linux-ec2-2.6.31.orig/drivers/gpu/drm/drm_drv.c +++ linux-ec2-2.6.31/drivers/gpu/drm/drm_drv.c @@ -470,7 +470,9 @@ retcode = -EFAULT; goto err_i1; } - } + } else + memset(kdata, 0, _IOC_SIZE(cmd)); + retcode = func(dev, kdata, file_priv); if (cmd & IOC_OUT) { --- linux-ec2-2.6.31.orig/drivers/gpu/drm/drm_irq.c +++ linux-ec2-2.6.31/drivers/gpu/drm/drm_irq.c @@ -402,15 +402,21 @@ spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ - if (atomic_add_return(1, &dev->vblank_refcount[crtc]) == 1 && - !dev->vblank_enabled[crtc]) { - ret = dev->driver->enable_vblank(dev, crtc); - DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret); - if (ret) + if (atomic_add_return(1, &dev->vblank_refcount[crtc]) == 1) { + if (!dev->vblank_enabled[crtc]) { + ret = dev->driver->enable_vblank(dev, crtc); + DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret); + if (ret) + atomic_dec(&dev->vblank_refcount[crtc]); + else { + dev->vblank_enabled[crtc] = 1; + drm_update_vblank_count(dev, crtc); + } + } + } else { + if (!dev->vblank_enabled[crtc]) { atomic_dec(&dev->vblank_refcount[crtc]); - else { - dev->vblank_enabled[crtc] = 1; - drm_update_vblank_count(dev, crtc); + ret = -EINVAL; } } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); @@ -437,6 +443,18 @@ } EXPORT_SYMBOL(drm_vblank_put); +void drm_vblank_off(struct drm_device *dev, int crtc) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev->vbl_lock, irqflags); + DRM_WAKEUP(&dev->vbl_queue[crtc]); + dev->vblank_enabled[crtc] = 0; + dev->last_vblank[crtc] = dev->driver->get_vblank_counter(dev, crtc); + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); +} +EXPORT_SYMBOL(drm_vblank_off); + /** * drm_vblank_pre_modeset - account for vblanks across mode sets * @dev: DRM device --- linux-ec2-2.6.31.orig/drivers/gpu/drm/drm_edid.c +++ linux-ec2-2.6.31/drivers/gpu/drm/drm_edid.c @@ -333,6 +333,12 @@ mode->vsync_end = mode->vsync_start + vsync_pulse_width; mode->vtotal = mode->vdisplay + vblank; + /* Some EDIDs have bogus h/vtotal values */ + if (mode->hsync_end > mode->htotal) + mode->htotal = mode->hsync_end + 1; + if (mode->vsync_end > mode->vtotal) + mode->vtotal = mode->vsync_end + 1; + drm_mode_set_name(mode); if (pt->misc & DRM_EDID_PT_INTERLACED) --- linux-ec2-2.6.31.orig/drivers/gpu/drm/Kconfig +++ linux-ec2-2.6.31/drivers/gpu/drm/Kconfig @@ -82,6 +82,7 @@ config DRM_I915 tristate "i915 driver" depends on AGP_INTEL + select SHMEM select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT --- linux-ec2-2.6.31.orig/drivers/gpu/drm/r128/r128_drv.h +++ linux-ec2-2.6.31/drivers/gpu/drm/r128/r128_drv.h @@ -422,6 +422,14 @@ * Misc helper macros */ +#define DEV_INIT_TEST_WITH_RETURN(_dev_priv) \ +do { \ + if (!_dev_priv) { \ + DRM_ERROR("called with no initialization\n"); \ + return -EINVAL; \ + } \ +} while (0) + #define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \ do { \ drm_r128_ring_buffer_t *ring = &dev_priv->ring; int i; \ --- linux-ec2-2.6.31.orig/drivers/gpu/drm/r128/r128_state.c +++ linux-ec2-2.6.31/drivers/gpu/drm/r128/r128_state.c @@ -1244,14 +1244,18 @@ static int r128_cce_clear(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_r128_private_t *dev_priv = dev->dev_private; - drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv; + drm_r128_sarea_t *sarea_priv; drm_r128_clear_t *clear = data; DRM_DEBUG("\n"); LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); + sarea_priv = dev_priv->sarea_priv; + if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS) sarea_priv->nbox = R128_NR_SAREA_CLIPRECTS; @@ -1312,6 +1316,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); if (!dev_priv->page_flipping) @@ -1331,6 +1337,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS) @@ -1354,10 +1362,7 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n", DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard); @@ -1410,10 +1415,7 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d buf=%d s=%d e=%d d=%d\n", DRM_CURRENTPID, elts->idx, elts->start, elts->end, elts->discard); @@ -1476,6 +1478,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + DRM_DEBUG("pid=%d index=%d\n", DRM_CURRENTPID, blit->idx); if (blit->idx < 0 || blit->idx >= dma->buf_count) { @@ -1501,6 +1505,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); ret = -EINVAL; @@ -1531,6 +1537,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32))) return -EFAULT; @@ -1555,10 +1563,7 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("idx=%d s=%d e=%d d=%d\n", indirect->idx, indirect->start, indirect->end, @@ -1620,10 +1625,7 @@ drm_r128_getparam_t *param = data; int value; - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/r128/r128_cce.c +++ linux-ec2-2.6.31/drivers/gpu/drm/r128/r128_cce.c @@ -353,6 +353,11 @@ DRM_DEBUG("\n"); + if (dev->dev_private) { + DRM_DEBUG("called when already initialized\n"); + return -EINVAL; + } + dev_priv = kzalloc(sizeof(drm_r128_private_t), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; @@ -649,6 +654,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (dev_priv->cce_running || dev_priv->cce_mode == R128_PM4_NONPM4) { DRM_DEBUG("while CCE running\n"); return 0; @@ -671,6 +678,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + /* Flush any pending CCE commands. This ensures any outstanding * commands are exectuted by the engine before we turn it off. */ @@ -708,10 +717,7 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_DEBUG("called before init done\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); r128_do_cce_reset(dev_priv); @@ -728,6 +734,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (dev_priv->cce_running) { r128_do_cce_flush(dev_priv); } @@ -741,6 +749,8 @@ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev->dev_private); + return r128_do_engine_reset(dev); } --- linux-ec2-2.6.31.orig/drivers/gpu/drm/radeon/radeon_atombios.c +++ linux-ec2-2.6.31/drivers/gpu/drm/radeon/radeon_atombios.c @@ -134,6 +134,14 @@ } } + /* HIS X1300 is DVI+VGA, not DVI+DVI */ + if ((dev->pdev->device == 0x7146) && + (dev->pdev->subsystem_vendor == 0x17af) && + (dev->pdev->subsystem_device == 0x2058)) { + if (supported_device == ATOM_DEVICE_DFP1_SUPPORT) + return false; + } + /* Funky macbooks */ if ((dev->pdev->device == 0x71C5) && (dev->pdev->subsystem_vendor == 0x106b) && --- linux-ec2-2.6.31.orig/drivers/gpu/drm/radeon/radeon_drv.c +++ linux-ec2-2.6.31/drivers/gpu/drm/radeon/radeon_drv.c @@ -328,8 +328,8 @@ #endif /* if enabled by default */ if (radeon_modeset == -1) { - DRM_INFO("radeon default to kernel modesetting.\n"); - radeon_modeset = 1; + DRM_INFO("radeon default to kernel modesetting DISABLED.\n"); + radeon_modeset = 0; } if (radeon_modeset == 1) { DRM_INFO("radeon kernel modesetting enabled.\n"); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ linux-ec2-2.6.31/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -291,8 +291,7 @@ uint32_t mask; if (radeon_crtc->crtc_id) - mask = (RADEON_CRTC2_EN | - RADEON_CRTC2_DISP_DIS | + mask = (RADEON_CRTC2_DISP_DIS | RADEON_CRTC2_VSYNC_DIS | RADEON_CRTC2_HSYNC_DIS | RADEON_CRTC2_DISP_REQ_EN_B); @@ -304,7 +303,7 @@ switch (mode) { case DRM_MODE_DPMS_ON: if (radeon_crtc->crtc_id) - WREG32_P(RADEON_CRTC2_GEN_CNTL, RADEON_CRTC2_EN, ~mask); + WREG32_P(RADEON_CRTC2_GEN_CNTL, RADEON_CRTC2_EN, ~(RADEON_CRTC2_EN | mask)); else { WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_EN, ~(RADEON_CRTC_EN | RADEON_CRTC_DISP_REQ_EN_B)); @@ -318,7 +317,7 @@ case DRM_MODE_DPMS_OFF: drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id); if (radeon_crtc->crtc_id) - WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~mask); + WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~(RADEON_CRTC2_EN | mask)); else { WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_DISP_REQ_EN_B, ~(RADEON_CRTC_EN | RADEON_CRTC_DISP_REQ_EN_B)); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/radeon/radeon_fb.c +++ linux-ec2-2.6.31/drivers/gpu/drm/radeon/radeon_fb.c @@ -120,7 +120,7 @@ struct drm_framebuffer *fb = &rfb->base; int depth; - if (var->pixclock == -1 || !var->pixclock) { + if (var->pixclock != 0) { return -EINVAL; } /* Need to resize the fb object !!! */ @@ -234,7 +234,7 @@ int ret; int i; - if (var->pixclock != -1) { + if (var->pixclock != 0) { DRM_ERROR("PIXEL CLCOK SET\n"); return -EINVAL; } @@ -828,7 +828,7 @@ rfbdev->crtc_count = crtc_count; if (new_fb) { - info->var.pixclock = -1; + info->var.pixclock = 0; if (register_framebuffer(info) < 0) return -EINVAL; } else { --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_display.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_display.c @@ -804,10 +804,8 @@ struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_clock_t clock; - int max_n; - bool found; int err_most = 47; - found = false; + int err_min = 10000; /* eDP has only 2 clock choice, no n/m/p setting */ if (HAS_eDP) @@ -818,7 +816,7 @@ refclk, best_clock); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { - if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) == + if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) clock.p2 = limit->p2.p2_fast; else @@ -831,16 +829,14 @@ } memset(best_clock, 0, sizeof(*best_clock)); - max_n = limit->n.max; - /* based on hardware requriment prefer smaller n to precision */ - for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { - /* based on hardware requirment prefere larger m1,m2, p1 */ - for (clock.m1 = limit->m1.max; - clock.m1 >= limit->m1.min; clock.m1--) { - for (clock.m2 = limit->m2.max; - clock.m2 >= limit->m2.min; clock.m2--) { - for (clock.p1 = limit->p1.max; - clock.p1 >= limit->p1.min; clock.p1--) { + for (clock.p1 = limit->p1.max; clock.p1 >= limit->p1.min; clock.p1--) { + /* based on hardware requriment prefer smaller n to precision */ + for (clock.n = limit->n.min; clock.n <= limit->n.max; clock.n++) { + /* based on hardware requirment prefere larger m1,m2 */ + for (clock.m1 = limit->m1.max; + clock.m1 >= limit->m1.min; clock.m1--) { + for (clock.m2 = limit->m2.max; + clock.m2 >= limit->m2.min; clock.m2--) { int this_err; intel_clock(dev, refclk, &clock); @@ -849,18 +845,18 @@ this_err = abs((10000 - (target*10000/clock.dot))); if (this_err < err_most) { *best_clock = clock; - err_most = this_err; - max_n = clock.n; - found = true; /* found on first matching */ goto out; + } else if (this_err < err_min) { + *best_clock = clock; + err_min = this_err; } } } } } out: - return found; + return true; } /* DisplayPort has only two frequencies, 162MHz and 270MHz */ @@ -1008,6 +1004,10 @@ dspcntr &= ~DISPPLANE_TILED; } + if (IS_IGDNG(dev)) + /* must disable */ + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + I915_WRITE(dspcntr_reg, dspcntr); Start = obj_priv->gtt_offset; @@ -1154,6 +1154,7 @@ int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; + int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS; int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; @@ -1177,6 +1178,15 @@ case DRM_MODE_DPMS_STANDBY: case DRM_MODE_DPMS_SUSPEND: DRM_DEBUG("crtc %d dpms on\n", pipe); + + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + temp = I915_READ(PCH_LVDS); + if ((temp & LVDS_PORT_EN) == 0) { + I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN); + POSTING_READ(PCH_LVDS); + } + } + if (HAS_eDP) { /* enable eDP PLL */ igdng_enable_pll_edp(crtc); @@ -1205,6 +1215,19 @@ } } + /* Enable panel fitting for LVDS */ + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + temp = I915_READ(pf_ctl_reg); + I915_WRITE(pf_ctl_reg, temp | PF_ENABLE | PF_FILTER_MED_3x3); + + /* currently full aspect */ + I915_WRITE(pf_win_pos, 0); + + I915_WRITE(pf_win_size, + (dev_priv->panel_fixed_mode->hdisplay << 16) | + (dev_priv->panel_fixed_mode->vdisplay)); + } + /* Enable CPU pipe */ temp = I915_READ(pipeconf_reg); if ((temp & PIPEACONF_ENABLE) == 0) { @@ -1348,8 +1371,6 @@ case DRM_MODE_DPMS_OFF: DRM_DEBUG("crtc %d dpms off\n", pipe); - i915_disable_vga(dev); - /* Disable display plane */ temp = I915_READ(dspcntr_reg); if ((temp & DISPLAY_PLANE_ENABLE) != 0) { @@ -1359,6 +1380,8 @@ I915_READ(dspbase_reg); } + i915_disable_vga(dev); + /* disable cpu pipe, disable after all planes disabled */ temp = I915_READ(pipeconf_reg); if ((temp & PIPEACONF_ENABLE) != 0) { @@ -1379,9 +1402,15 @@ } else DRM_DEBUG("crtc %d is disabled\n", pipe); - if (HAS_eDP) { - igdng_disable_pll_edp(crtc); + udelay(100); + + /* Disable PF */ + temp = I915_READ(pf_ctl_reg); + if ((temp & PF_ENABLE) != 0) { + I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); + I915_READ(pf_ctl_reg); } + I915_WRITE(pf_win_size, 0); /* disable CPU FDI tx and PCH FDI rx */ temp = I915_READ(fdi_tx_reg); @@ -1407,6 +1436,13 @@ udelay(100); + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + temp = I915_READ(PCH_LVDS); + I915_WRITE(PCH_LVDS, temp & ~LVDS_PORT_EN); + I915_READ(PCH_LVDS); + udelay(100); + } + /* disable PCH transcoder */ temp = I915_READ(transconf_reg); if ((temp & TRANS_ENABLE) != 0) { @@ -1426,6 +1462,8 @@ } } + udelay(100); + /* disable PCH DPLL */ temp = I915_READ(pch_dpll_reg); if ((temp & DPLL_VCO_ENABLE) != 0) { @@ -1433,14 +1471,20 @@ I915_READ(pch_dpll_reg); } - temp = I915_READ(fdi_rx_reg); - if ((temp & FDI_RX_PLL_ENABLE) != 0) { - temp &= ~FDI_SEL_PCDCLK; - temp &= ~FDI_RX_PLL_ENABLE; - I915_WRITE(fdi_rx_reg, temp); - I915_READ(fdi_rx_reg); + if (HAS_eDP) { + igdng_disable_pll_edp(crtc); } + temp = I915_READ(fdi_rx_reg); + temp &= ~FDI_SEL_PCDCLK; + I915_WRITE(fdi_rx_reg, temp); + I915_READ(fdi_rx_reg); + + temp = I915_READ(fdi_rx_reg); + temp &= ~FDI_RX_PLL_ENABLE; + I915_WRITE(fdi_rx_reg, temp); + I915_READ(fdi_rx_reg); + /* Disable CPU FDI TX PLL */ temp = I915_READ(fdi_tx_reg); if ((temp & FDI_TX_PLL_ENABLE) != 0) { @@ -1449,16 +1493,8 @@ udelay(100); } - /* Disable PF */ - temp = I915_READ(pf_ctl_reg); - if ((temp & PF_ENABLE) != 0) { - I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); - I915_READ(pf_ctl_reg); - } - I915_WRITE(pf_win_size, 0); - /* Wait for the clocks to turn off. */ - udelay(150); + udelay(100); break; } } @@ -1522,6 +1558,7 @@ intel_update_watermarks(dev); /* Give the overlay scaler a chance to disable if it's on this pipe */ //intel_crtc_dpms_video(crtc, FALSE); TODO + drm_vblank_off(dev, pipe); /* Disable the VGA plane that we never use */ i915_disable_vga(dev); @@ -1746,7 +1783,7 @@ #define LINK_N 0x80000 static void -igdng_compute_m_n(int bytes_per_pixel, int nlanes, +igdng_compute_m_n(int bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct fdi_m_n *m_n) { @@ -1756,7 +1793,8 @@ temp = (u64) DATA_N * pixel_clock; temp = div_u64(temp, link_clock); - m_n->gmch_m = div_u64(temp * bytes_per_pixel, nlanes); + m_n->gmch_m = div_u64(temp * bits_per_pixel, nlanes); + m_n->gmch_m >>= 3; /* convert to bytes_per_pixel */ m_n->gmch_n = DATA_N; fdi_reduce_ratio(&m_n->gmch_m, &m_n->gmch_n); @@ -1858,7 +1896,14 @@ { long entries_required, wm_size; - entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; + /* + * Note: we need to make sure we don't overflow for various clock & + * latency values. + * clocks go from a few thousand to several hundred thousand. + * latency is usually a few thousand + */ + entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) / + 1000; entries_required /= wm->cacheline_size; DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required); @@ -2371,7 +2416,7 @@ /* FDI link */ if (IS_IGDNG(dev)) { - int lane, link_bw; + int lane, link_bw, bpp; /* eDP doesn't require FDI link, so just set DP M/N according to current link config */ if (is_edp) { @@ -2390,10 +2435,72 @@ lane = 4; link_bw = 270000; } - igdng_compute_m_n(3, lane, target_clock, + + /* determine panel color depth */ + temp = I915_READ(pipeconf_reg); + + switch (temp & PIPE_BPC_MASK) { + case PIPE_8BPC: + bpp = 24; + break; + case PIPE_10BPC: + bpp = 30; + break; + case PIPE_6BPC: + bpp = 18; + break; + case PIPE_12BPC: + bpp = 36; + break; + default: + DRM_ERROR("unknown pipe bpc value\n"); + bpp = 24; + } + + igdng_compute_m_n(bpp, lane, target_clock, link_bw, &m_n); } + /* Ironlake: try to setup display ref clock before DPLL + * enabling. This is only under driver's control after + * PCH B stepping, previous chipset stepping should be + * ignoring this setting. + */ + if (IS_IGDNG(dev)) { + temp = I915_READ(PCH_DREF_CONTROL); + /* Always enable nonspread source */ + temp &= ~DREF_NONSPREAD_SOURCE_MASK; + temp |= DREF_NONSPREAD_SOURCE_ENABLE; + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + + temp &= ~DREF_SSC_SOURCE_MASK; + temp |= DREF_SSC_SOURCE_ENABLE; + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + + udelay(200); + + if (is_edp) { + if (dev_priv->lvds_use_ssc) { + temp |= DREF_SSC1_ENABLE; + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + + udelay(200); + + temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; + temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + } else { + temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + } + } + } + if (IS_IGD(dev)) fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2; else @@ -2616,6 +2723,12 @@ intel_wait_for_vblank(dev); + if (IS_IGDNG(dev)) { + /* enable address swizzle for tiling buffer */ + temp = I915_READ(DISP_ARB_CTL); + I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING); + } + I915_WRITE(dspcntr_reg, dspcntr); /* Flush the plane changes */ @@ -3231,7 +3344,7 @@ if (I915_READ(PCH_DP_D) & DP_DETECTED) intel_dp_init(dev, PCH_DP_D); - } else if (IS_I9XX(dev)) { + } else if (SUPPORTS_DIGITAL_OUTPUTS(dev)) { bool found = false; if (I915_READ(SDVOB) & SDVO_DETECTED) { @@ -3258,10 +3371,10 @@ if (SUPPORTS_INTEGRATED_DP(dev) && (I915_READ(DP_D) & DP_DETECTED)) intel_dp_init(dev, DP_D); - } else + } else if (IS_I8XX(dev)) intel_dvo_init(dev); - if (IS_I9XX(dev) && IS_MOBILE(dev) && !IS_IGDNG(dev)) + if (SUPPORTS_TV(dev)) intel_tv_init(dev); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_bios.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_bios.c @@ -217,6 +217,9 @@ if (IS_I85X(dev_priv->dev)) dev_priv->lvds_ssc_freq = general->ssc_freq ? 66 : 48; + else if (IS_IGDNG(dev_priv->dev)) + dev_priv->lvds_ssc_freq = + general->ssc_freq ? 100 : 120; else dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_gem.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_gem.c @@ -1151,27 +1151,21 @@ mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unlock; } /* Need a new fence register? */ if (obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + @@ -1179,18 +1173,18 @@ /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); - +unlock: mutex_unlock(&dev->struct_mutex); switch (ret) { + case 0: + case -ERESTARTSYS: + return VM_FAULT_NOPAGE; case -ENOMEM: case -EAGAIN: return VM_FAULT_OOM; - case -EFAULT: - case -EINVAL: - return VM_FAULT_SIGBUS; default: - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } } @@ -2506,16 +2500,6 @@ if (obj_priv->pages == NULL) return; - /* XXX: The 865 in particular appears to be weird in how it handles - * cache flushing. We haven't figured it out, but the - * clflush+agp_chipset_flush doesn't appear to successfully get the - * data visible to the PGU, while wbinvd + agp_chipset_flush does. - */ - if (IS_I865G(obj->dev)) { - wbinvd(); - return; - } - drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -3007,6 +2991,16 @@ return -EINVAL; } + if (reloc->delta >= target_obj->size) { + DRM_ERROR("Relocation beyond target object bounds: " + "obj %p target %d delta %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->delta, (int) target_obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " @@ -3837,7 +3831,8 @@ i915_gem_object_unbind(obj); - i915_gem_free_mmap_offset(obj); + if (obj_priv->mmap_offset) + i915_gem_free_mmap_offset(obj); kfree(obj_priv->page_cpu_valid); kfree(obj_priv->bit_17); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_irq.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_irq.c @@ -156,6 +156,20 @@ } /** + * intel_enable_asle - enable ASLE interrupt for OpRegion + */ +void intel_enable_asle (struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + + if (IS_IGDNG(dev)) + igdng_enable_display_irq(dev_priv, DE_GSE); + else + i915_enable_pipestat(dev_priv, 1, + I915_LEGACY_BLC_EVENT_ENABLE); +} + +/** * i915_pipe_enabled - check if a pipe is enabled * @dev: DRM device * @pipe: pipe to check @@ -253,40 +267,54 @@ { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int ret = IRQ_NONE; - u32 de_iir, gt_iir; - u32 new_de_iir, new_gt_iir; + u32 de_iir, gt_iir, de_ier, pch_iir; struct drm_i915_master_private *master_priv; + /* disable master interrupt before clearing iir */ + de_ier = I915_READ(DEIER); + I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL); + (void)I915_READ(DEIER); + de_iir = I915_READ(DEIIR); gt_iir = I915_READ(GTIIR); + pch_iir = I915_READ(SDEIIR); - for (;;) { - if (de_iir == 0 && gt_iir == 0) - break; + if (de_iir == 0 && gt_iir == 0 && pch_iir == 0) + goto done; - ret = IRQ_HANDLED; + ret = IRQ_HANDLED; - I915_WRITE(DEIIR, de_iir); - new_de_iir = I915_READ(DEIIR); - I915_WRITE(GTIIR, gt_iir); - new_gt_iir = I915_READ(GTIIR); + if (dev->primary->master) { + master_priv = dev->primary->master->driver_priv; + if (master_priv->sarea_priv) + master_priv->sarea_priv->last_dispatch = + READ_BREADCRUMB(dev_priv); + } - if (dev->primary->master) { - master_priv = dev->primary->master->driver_priv; - if (master_priv->sarea_priv) - master_priv->sarea_priv->last_dispatch = - READ_BREADCRUMB(dev_priv); - } + if (gt_iir & GT_USER_INTERRUPT) { + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + DRM_WAKEUP(&dev_priv->irq_queue); + } - if (gt_iir & GT_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); - DRM_WAKEUP(&dev_priv->irq_queue); - } + if (de_iir & DE_GSE) + ironlake_opregion_gse_intr(dev); - de_iir = new_de_iir; - gt_iir = new_gt_iir; + /* check event from PCH */ + if ((de_iir & DE_PCH_EVENT) && + (pch_iir & SDE_HOTPLUG_MASK)) { + queue_work(dev_priv->wq, &dev_priv->hotplug_work); } + /* should clear PCH hotplug event before clear CPU irq */ + I915_WRITE(SDEIIR, pch_iir); + I915_WRITE(GTIIR, gt_iir); + I915_WRITE(DEIIR, de_iir); + +done: + I915_WRITE(DEIER, de_ier); + (void)I915_READ(DEIER); + return ret; } @@ -877,14 +905,21 @@ I915_WRITE(GTIMR, 0xffffffff); I915_WRITE(GTIER, 0x0); (void) I915_READ(GTIER); + + /* south display irq */ + I915_WRITE(SDEIMR, 0xffffffff); + I915_WRITE(SDEIER, 0x0); + (void) I915_READ(SDEIER); } static int igdng_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; /* enable kind of interrupts always enabled */ - u32 display_mask = DE_MASTER_IRQ_CONTROL /*| DE_PCH_EVENT */; + u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT; u32 render_mask = GT_USER_INTERRUPT; + u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | + SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; dev_priv->irq_mask_reg = ~display_mask; dev_priv->de_irq_enable_reg = display_mask; @@ -904,6 +939,14 @@ I915_WRITE(GTIER, dev_priv->gt_irq_enable_reg); (void) I915_READ(GTIER); + dev_priv->pch_irq_mask_reg = ~hotplug_mask; + dev_priv->pch_irq_enable_reg = hotplug_mask; + + I915_WRITE(SDEIIR, I915_READ(SDEIIR)); + I915_WRITE(SDEIMR, dev_priv->pch_irq_mask_reg); + I915_WRITE(SDEIER, dev_priv->pch_irq_enable_reg); + (void) I915_READ(SDEIER); + return 0; } --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_drv.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_drv.c @@ -94,8 +94,6 @@ struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; - pci_set_power_state(dev->pdev, PCI_D0); - pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) return -1; pci_set_master(dev->pdev); @@ -263,6 +261,7 @@ module_init(i915_init); module_exit(i915_exit); +MODULE_IMPORT(intel_agp); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL and additional rights"); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_gem_tiling.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -234,7 +234,13 @@ uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; bool need_disable; - if (!IS_I9XX(dev)) { + if (IS_IGDNG(dev)) { + /* On IGDNG whatever DRAM config, GPU always do + * same swizzling setup. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (!IS_I9XX(dev)) { /* As far as we know, the 865 doesn't have these bit 6 * swizzling issues. */ @@ -317,13 +323,6 @@ } } - /* FIXME: check with memory config on IGDNG */ - if (IS_IGDNG(dev)) { - DRM_ERROR("disable tiling on IGDNG...\n"); - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } - dev_priv->mm.bit_6_swizzle_x = swizzle_x; dev_priv->mm.bit_6_swizzle_y = swizzle_y; } --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_tv.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_tv.c @@ -1212,20 +1212,17 @@ tv_ctl |= TV_TRILEVEL_SYNC; if (tv_mode->pal_burst) tv_ctl |= TV_PAL_BURST; + scctl1 = 0; - /* dda1 implies valid video levels */ - if (tv_mode->dda1_inc) { + if (tv_mode->dda1_inc) scctl1 |= TV_SC_DDA1_EN; - } - if (tv_mode->dda2_inc) scctl1 |= TV_SC_DDA2_EN; - if (tv_mode->dda3_inc) scctl1 |= TV_SC_DDA3_EN; - scctl1 |= tv_mode->sc_reset; - scctl1 |= video_levels->burst << TV_BURST_LEVEL_SHIFT; + if (video_levels) + scctl1 |= video_levels->burst << TV_BURST_LEVEL_SHIFT; scctl1 |= tv_mode->dda1_inc << TV_SCDDA1_INC_SHIFT; scctl2 = tv_mode->dda2_size << TV_SCDDA2_SIZE_SHIFT | --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_dma.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_dma.c @@ -1257,9 +1257,7 @@ } /* Must be done after probing outputs */ - /* FIXME: verify on IGDNG */ - if (!IS_IGDNG(dev)) - intel_opregion_init(dev, 0); + intel_opregion_init(dev, 0); return 0; @@ -1297,8 +1295,7 @@ if (dev_priv->regs != NULL) iounmap(dev_priv->regs); - if (!IS_IGDNG(dev)) - intel_opregion_free(dev, 0); + intel_opregion_free(dev, 0); if (drm_core_check_feature(dev, DRIVER_MODESET)) { intel_modeset_cleanup(dev); --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_fb.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_fb.c @@ -114,7 +114,7 @@ struct drm_framebuffer *fb = &intel_fb->base; int depth; - if (var->pixclock == -1 || !var->pixclock) + if (var->pixclock != 0) return -EINVAL; /* Need to resize the fb object !!! */ @@ -205,7 +205,7 @@ DRM_DEBUG("%d %d\n", var->xres, var->pixclock); - if (var->pixclock != -1) { + if (var->pixclock != 0) { DRM_ERROR("PIXEL CLOCK SET\n"); return -EINVAL; @@ -461,7 +461,7 @@ mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_pin(fbo, PAGE_SIZE); + ret = i915_gem_object_pin(fbo, 64*1024); if (ret) { DRM_ERROR("failed to pin fb: %d\n", ret); goto out_unref; @@ -692,7 +692,7 @@ par->crtc_count = 1; if (new_fb) { - info->var.pixclock = -1; + info->var.pixclock = 0; if (register_framebuffer(info) < 0) return -EINVAL; } else @@ -846,7 +846,7 @@ par->crtc_count = crtc_count; if (new_fb) { - info->var.pixclock = -1; + info->var.pixclock = 0; if (register_framebuffer(info) < 0) return -EINVAL; } else --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_dp.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_dp.c @@ -400,7 +400,7 @@ { struct intel_dp_priv *dp_priv = intel_output->dev_priv; - DRM_ERROR("i2c_init %s\n", name); + DRM_INFO("i2c_init %s\n", name); dp_priv->algo.running = false; dp_priv->algo.address = 0; dp_priv->algo.aux_ch = intel_dp_i2c_aux_ch; --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_suspend.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_suspend.c @@ -32,11 +32,15 @@ static bool i915_pipe_enabled(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpll_reg; - if (pipe == PIPE_A) - return (I915_READ(DPLL_A) & DPLL_VCO_ENABLE); - else - return (I915_READ(DPLL_B) & DPLL_VCO_ENABLE); + if (IS_IGDNG(dev)) { + dpll_reg = (pipe == PIPE_A) ? PCH_DPLL_A: PCH_DPLL_B; + } else { + dpll_reg = (pipe == PIPE_A) ? DPLL_A: DPLL_B; + } + + return (I915_READ(dpll_reg) & DPLL_VCO_ENABLE); } static void i915_save_palette(struct drm_device *dev, enum pipe pipe) @@ -49,6 +53,9 @@ if (!i915_pipe_enabled(dev, pipe)) return; + if (IS_IGDNG(dev)) + reg = (pipe == PIPE_A) ? LGC_PALETTE_A : LGC_PALETTE_B; + if (pipe == PIPE_A) array = dev_priv->save_palette_a; else @@ -68,6 +75,9 @@ if (!i915_pipe_enabled(dev, pipe)) return; + if (IS_IGDNG(dev)) + reg = (pipe == PIPE_A) ? LGC_PALETTE_A : LGC_PALETTE_B; + if (pipe == PIPE_A) array = dev_priv->save_palette_a; else @@ -228,13 +238,25 @@ if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + + if (IS_IGDNG(dev)) { + dev_priv->savePCH_DREF_CONTROL = I915_READ(PCH_DREF_CONTROL); + dev_priv->saveDISP_ARB_CTL = I915_READ(DISP_ARB_CTL); + } + /* Pipe & plane A info */ dev_priv->savePIPEACONF = I915_READ(PIPEACONF); dev_priv->savePIPEASRC = I915_READ(PIPEASRC); - dev_priv->saveFPA0 = I915_READ(FPA0); - dev_priv->saveFPA1 = I915_READ(FPA1); - dev_priv->saveDPLL_A = I915_READ(DPLL_A); - if (IS_I965G(dev)) + if (IS_IGDNG(dev)) { + dev_priv->saveFPA0 = I915_READ(PCH_FPA0); + dev_priv->saveFPA1 = I915_READ(PCH_FPA1); + dev_priv->saveDPLL_A = I915_READ(PCH_DPLL_A); + } else { + dev_priv->saveFPA0 = I915_READ(FPA0); + dev_priv->saveFPA1 = I915_READ(FPA1); + dev_priv->saveDPLL_A = I915_READ(DPLL_A); + } + if (IS_I965G(dev) && !IS_IGDNG(dev)) dev_priv->saveDPLL_A_MD = I915_READ(DPLL_A_MD); dev_priv->saveHTOTAL_A = I915_READ(HTOTAL_A); dev_priv->saveHBLANK_A = I915_READ(HBLANK_A); @@ -242,7 +264,30 @@ dev_priv->saveVTOTAL_A = I915_READ(VTOTAL_A); dev_priv->saveVBLANK_A = I915_READ(VBLANK_A); dev_priv->saveVSYNC_A = I915_READ(VSYNC_A); - dev_priv->saveBCLRPAT_A = I915_READ(BCLRPAT_A); + if (!IS_IGDNG(dev)) + dev_priv->saveBCLRPAT_A = I915_READ(BCLRPAT_A); + + if (IS_IGDNG(dev)) { + dev_priv->savePIPEA_DATA_M1 = I915_READ(PIPEA_DATA_M1); + dev_priv->savePIPEA_DATA_N1 = I915_READ(PIPEA_DATA_N1); + dev_priv->savePIPEA_LINK_M1 = I915_READ(PIPEA_LINK_M1); + dev_priv->savePIPEA_LINK_N1 = I915_READ(PIPEA_LINK_N1); + + dev_priv->saveFDI_TXA_CTL = I915_READ(FDI_TXA_CTL); + dev_priv->saveFDI_RXA_CTL = I915_READ(FDI_RXA_CTL); + + dev_priv->savePFA_CTL_1 = I915_READ(PFA_CTL_1); + dev_priv->savePFA_WIN_SZ = I915_READ(PFA_WIN_SZ); + dev_priv->savePFA_WIN_POS = I915_READ(PFA_WIN_POS); + + dev_priv->saveTRANSACONF = I915_READ(TRANSACONF); + dev_priv->saveTRANS_HTOTAL_A = I915_READ(TRANS_HTOTAL_A); + dev_priv->saveTRANS_HBLANK_A = I915_READ(TRANS_HBLANK_A); + dev_priv->saveTRANS_HSYNC_A = I915_READ(TRANS_HSYNC_A); + dev_priv->saveTRANS_VTOTAL_A = I915_READ(TRANS_VTOTAL_A); + dev_priv->saveTRANS_VBLANK_A = I915_READ(TRANS_VBLANK_A); + dev_priv->saveTRANS_VSYNC_A = I915_READ(TRANS_VSYNC_A); + } dev_priv->saveDSPACNTR = I915_READ(DSPACNTR); dev_priv->saveDSPASTRIDE = I915_READ(DSPASTRIDE); @@ -259,10 +304,16 @@ /* Pipe & plane B info */ dev_priv->savePIPEBCONF = I915_READ(PIPEBCONF); dev_priv->savePIPEBSRC = I915_READ(PIPEBSRC); - dev_priv->saveFPB0 = I915_READ(FPB0); - dev_priv->saveFPB1 = I915_READ(FPB1); - dev_priv->saveDPLL_B = I915_READ(DPLL_B); - if (IS_I965G(dev)) + if (IS_IGDNG(dev)) { + dev_priv->saveFPB0 = I915_READ(PCH_FPB0); + dev_priv->saveFPB1 = I915_READ(PCH_FPB1); + dev_priv->saveDPLL_B = I915_READ(PCH_DPLL_B); + } else { + dev_priv->saveFPB0 = I915_READ(FPB0); + dev_priv->saveFPB1 = I915_READ(FPB1); + dev_priv->saveDPLL_B = I915_READ(DPLL_B); + } + if (IS_I965G(dev) && !IS_IGDNG(dev)) dev_priv->saveDPLL_B_MD = I915_READ(DPLL_B_MD); dev_priv->saveHTOTAL_B = I915_READ(HTOTAL_B); dev_priv->saveHBLANK_B = I915_READ(HBLANK_B); @@ -270,7 +321,30 @@ dev_priv->saveVTOTAL_B = I915_READ(VTOTAL_B); dev_priv->saveVBLANK_B = I915_READ(VBLANK_B); dev_priv->saveVSYNC_B = I915_READ(VSYNC_B); - dev_priv->saveBCLRPAT_A = I915_READ(BCLRPAT_A); + if (!IS_IGDNG(dev)) + dev_priv->saveBCLRPAT_B = I915_READ(BCLRPAT_B); + + if (IS_IGDNG(dev)) { + dev_priv->savePIPEB_DATA_M1 = I915_READ(PIPEB_DATA_M1); + dev_priv->savePIPEB_DATA_N1 = I915_READ(PIPEB_DATA_N1); + dev_priv->savePIPEB_LINK_M1 = I915_READ(PIPEB_LINK_M1); + dev_priv->savePIPEB_LINK_N1 = I915_READ(PIPEB_LINK_N1); + + dev_priv->saveFDI_TXB_CTL = I915_READ(FDI_TXB_CTL); + dev_priv->saveFDI_RXB_CTL = I915_READ(FDI_RXB_CTL); + + dev_priv->savePFB_CTL_1 = I915_READ(PFB_CTL_1); + dev_priv->savePFB_WIN_SZ = I915_READ(PFB_WIN_SZ); + dev_priv->savePFB_WIN_POS = I915_READ(PFB_WIN_POS); + + dev_priv->saveTRANSBCONF = I915_READ(TRANSBCONF); + dev_priv->saveTRANS_HTOTAL_B = I915_READ(TRANS_HTOTAL_B); + dev_priv->saveTRANS_HBLANK_B = I915_READ(TRANS_HBLANK_B); + dev_priv->saveTRANS_HSYNC_B = I915_READ(TRANS_HSYNC_B); + dev_priv->saveTRANS_VTOTAL_B = I915_READ(TRANS_VTOTAL_B); + dev_priv->saveTRANS_VBLANK_B = I915_READ(TRANS_VBLANK_B); + dev_priv->saveTRANS_VSYNC_B = I915_READ(TRANS_VSYNC_B); + } dev_priv->saveDSPBCNTR = I915_READ(DSPBCNTR); dev_priv->saveDSPBSTRIDE = I915_READ(DSPBSTRIDE); @@ -285,26 +359,50 @@ dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); return; } + static void i915_restore_modeset_reg(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int dpll_a_reg, fpa0_reg, fpa1_reg; + int dpll_b_reg, fpb0_reg, fpb1_reg; if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + if (IS_IGDNG(dev)) { + dpll_a_reg = PCH_DPLL_A; + dpll_b_reg = PCH_DPLL_B; + fpa0_reg = PCH_FPA0; + fpb0_reg = PCH_FPB0; + fpa1_reg = PCH_FPA1; + fpb1_reg = PCH_FPB1; + } else { + dpll_a_reg = DPLL_A; + dpll_b_reg = DPLL_B; + fpa0_reg = FPA0; + fpb0_reg = FPB0; + fpa1_reg = FPA1; + fpb1_reg = FPB1; + } + + if (IS_IGDNG(dev)) { + I915_WRITE(PCH_DREF_CONTROL, dev_priv->savePCH_DREF_CONTROL); + I915_WRITE(DISP_ARB_CTL, dev_priv->saveDISP_ARB_CTL); + } + /* Pipe & plane A info */ /* Prime the clock */ if (dev_priv->saveDPLL_A & DPLL_VCO_ENABLE) { - I915_WRITE(DPLL_A, dev_priv->saveDPLL_A & + I915_WRITE(dpll_a_reg, dev_priv->saveDPLL_A & ~DPLL_VCO_ENABLE); DRM_UDELAY(150); } - I915_WRITE(FPA0, dev_priv->saveFPA0); - I915_WRITE(FPA1, dev_priv->saveFPA1); + I915_WRITE(fpa0_reg, dev_priv->saveFPA0); + I915_WRITE(fpa1_reg, dev_priv->saveFPA1); /* Actually enable it */ - I915_WRITE(DPLL_A, dev_priv->saveDPLL_A); + I915_WRITE(dpll_a_reg, dev_priv->saveDPLL_A); DRM_UDELAY(150); - if (IS_I965G(dev)) + if (IS_I965G(dev) && !IS_IGDNG(dev)) I915_WRITE(DPLL_A_MD, dev_priv->saveDPLL_A_MD); DRM_UDELAY(150); @@ -315,7 +413,30 @@ I915_WRITE(VTOTAL_A, dev_priv->saveVTOTAL_A); I915_WRITE(VBLANK_A, dev_priv->saveVBLANK_A); I915_WRITE(VSYNC_A, dev_priv->saveVSYNC_A); - I915_WRITE(BCLRPAT_A, dev_priv->saveBCLRPAT_A); + if (!IS_IGDNG(dev)) + I915_WRITE(BCLRPAT_A, dev_priv->saveBCLRPAT_A); + + if (IS_IGDNG(dev)) { + I915_WRITE(PIPEA_DATA_M1, dev_priv->savePIPEA_DATA_M1); + I915_WRITE(PIPEA_DATA_N1, dev_priv->savePIPEA_DATA_N1); + I915_WRITE(PIPEA_LINK_M1, dev_priv->savePIPEA_LINK_M1); + I915_WRITE(PIPEA_LINK_N1, dev_priv->savePIPEA_LINK_N1); + + I915_WRITE(FDI_RXA_CTL, dev_priv->saveFDI_RXA_CTL); + I915_WRITE(FDI_TXA_CTL, dev_priv->saveFDI_TXA_CTL); + + I915_WRITE(PFA_CTL_1, dev_priv->savePFA_CTL_1); + I915_WRITE(PFA_WIN_SZ, dev_priv->savePFA_WIN_SZ); + I915_WRITE(PFA_WIN_POS, dev_priv->savePFA_WIN_POS); + + I915_WRITE(TRANSACONF, dev_priv->saveTRANSACONF); + I915_WRITE(TRANS_HTOTAL_A, dev_priv->saveTRANS_HTOTAL_A); + I915_WRITE(TRANS_HBLANK_A, dev_priv->saveTRANS_HBLANK_A); + I915_WRITE(TRANS_HSYNC_A, dev_priv->saveTRANS_HSYNC_A); + I915_WRITE(TRANS_VTOTAL_A, dev_priv->saveTRANS_VTOTAL_A); + I915_WRITE(TRANS_VBLANK_A, dev_priv->saveTRANS_VBLANK_A); + I915_WRITE(TRANS_VSYNC_A, dev_priv->saveTRANS_VSYNC_A); + } /* Restore plane info */ I915_WRITE(DSPASIZE, dev_priv->saveDSPASIZE); @@ -337,16 +458,16 @@ /* Pipe & plane B info */ if (dev_priv->saveDPLL_B & DPLL_VCO_ENABLE) { - I915_WRITE(DPLL_B, dev_priv->saveDPLL_B & + I915_WRITE(dpll_b_reg, dev_priv->saveDPLL_B & ~DPLL_VCO_ENABLE); DRM_UDELAY(150); } - I915_WRITE(FPB0, dev_priv->saveFPB0); - I915_WRITE(FPB1, dev_priv->saveFPB1); + I915_WRITE(fpb0_reg, dev_priv->saveFPB0); + I915_WRITE(fpb1_reg, dev_priv->saveFPB1); /* Actually enable it */ - I915_WRITE(DPLL_B, dev_priv->saveDPLL_B); + I915_WRITE(dpll_b_reg, dev_priv->saveDPLL_B); DRM_UDELAY(150); - if (IS_I965G(dev)) + if (IS_I965G(dev) && !IS_IGDNG(dev)) I915_WRITE(DPLL_B_MD, dev_priv->saveDPLL_B_MD); DRM_UDELAY(150); @@ -357,7 +478,30 @@ I915_WRITE(VTOTAL_B, dev_priv->saveVTOTAL_B); I915_WRITE(VBLANK_B, dev_priv->saveVBLANK_B); I915_WRITE(VSYNC_B, dev_priv->saveVSYNC_B); - I915_WRITE(BCLRPAT_B, dev_priv->saveBCLRPAT_B); + if (!IS_IGDNG(dev)) + I915_WRITE(BCLRPAT_B, dev_priv->saveBCLRPAT_B); + + if (IS_IGDNG(dev)) { + I915_WRITE(PIPEB_DATA_M1, dev_priv->savePIPEB_DATA_M1); + I915_WRITE(PIPEB_DATA_N1, dev_priv->savePIPEB_DATA_N1); + I915_WRITE(PIPEB_LINK_M1, dev_priv->savePIPEB_LINK_M1); + I915_WRITE(PIPEB_LINK_N1, dev_priv->savePIPEB_LINK_N1); + + I915_WRITE(FDI_RXB_CTL, dev_priv->saveFDI_RXB_CTL); + I915_WRITE(FDI_TXB_CTL, dev_priv->saveFDI_TXB_CTL); + + I915_WRITE(PFB_CTL_1, dev_priv->savePFB_CTL_1); + I915_WRITE(PFB_WIN_SZ, dev_priv->savePFB_WIN_SZ); + I915_WRITE(PFB_WIN_POS, dev_priv->savePFB_WIN_POS); + + I915_WRITE(TRANSBCONF, dev_priv->saveTRANSBCONF); + I915_WRITE(TRANS_HTOTAL_B, dev_priv->saveTRANS_HTOTAL_B); + I915_WRITE(TRANS_HBLANK_B, dev_priv->saveTRANS_HBLANK_B); + I915_WRITE(TRANS_HSYNC_B, dev_priv->saveTRANS_HSYNC_B); + I915_WRITE(TRANS_VTOTAL_B, dev_priv->saveTRANS_VTOTAL_B); + I915_WRITE(TRANS_VBLANK_B, dev_priv->saveTRANS_VBLANK_B); + I915_WRITE(TRANS_VSYNC_B, dev_priv->saveTRANS_VSYNC_B); + } /* Restore plane info */ I915_WRITE(DSPBSIZE, dev_priv->saveDSPBSIZE); @@ -379,19 +523,10 @@ return; } -int i915_save_state(struct drm_device *dev) + +void i915_save_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); - - /* Hardware status page */ - dev_priv->saveHWS = I915_READ(HWS_PGA); /* Display arbitration control */ dev_priv->saveDSPARB = I915_READ(DSPARB); @@ -399,6 +534,7 @@ /* This is only meaningful in non-KMS mode */ /* Don't save them in KMS mode */ i915_save_modeset_reg(dev); + /* Cursor state */ dev_priv->saveCURACNTR = I915_READ(CURACNTR); dev_priv->saveCURAPOS = I915_READ(CURAPOS); @@ -410,21 +546,43 @@ dev_priv->saveCURSIZE = I915_READ(CURSIZE); /* CRT state */ - dev_priv->saveADPA = I915_READ(ADPA); + if (IS_IGDNG(dev)) { + dev_priv->saveADPA = I915_READ(PCH_ADPA); + } else { + dev_priv->saveADPA = I915_READ(ADPA); + } /* LVDS state */ - dev_priv->savePP_CONTROL = I915_READ(PP_CONTROL); - dev_priv->savePFIT_PGM_RATIOS = I915_READ(PFIT_PGM_RATIOS); - dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL); - if (IS_I965G(dev)) - dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_CTL2); - if (IS_MOBILE(dev) && !IS_I830(dev)) - dev_priv->saveLVDS = I915_READ(LVDS); - if (!IS_I830(dev) && !IS_845G(dev)) + if (IS_IGDNG(dev)) { + dev_priv->savePP_CONTROL = I915_READ(PCH_PP_CONTROL); + dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_PCH_CTL1); + dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_PCH_CTL2); + dev_priv->saveBLC_CPU_PWM_CTL = I915_READ(BLC_PWM_CPU_CTL); + dev_priv->saveBLC_CPU_PWM_CTL2 = I915_READ(BLC_PWM_CPU_CTL2); + dev_priv->saveLVDS = I915_READ(PCH_LVDS); + } else { + dev_priv->savePP_CONTROL = I915_READ(PP_CONTROL); + dev_priv->savePFIT_PGM_RATIOS = I915_READ(PFIT_PGM_RATIOS); + dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL); + dev_priv->saveBLC_HIST_CTL = I915_READ(BLC_HIST_CTL); + if (IS_I965G(dev)) + dev_priv->saveBLC_PWM_CTL2 = I915_READ(BLC_PWM_CTL2); + if (IS_MOBILE(dev) && !IS_I830(dev)) + dev_priv->saveLVDS = I915_READ(LVDS); + } + + if (!IS_I830(dev) && !IS_845G(dev) && !IS_IGDNG(dev)) dev_priv->savePFIT_CONTROL = I915_READ(PFIT_CONTROL); - dev_priv->savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS); - dev_priv->savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS); - dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR); + + if (IS_IGDNG(dev)) { + dev_priv->savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS); + dev_priv->savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS); + dev_priv->savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR); + } else { + dev_priv->savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS); + dev_priv->savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS); + dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR); + } /* Display Port state */ if (SUPPORTS_INTEGRATED_DP(dev)) { @@ -448,16 +606,143 @@ dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2); dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL); - /* Interrupt state */ - dev_priv->saveIIR = I915_READ(IIR); - dev_priv->saveIER = I915_READ(IER); - dev_priv->saveIMR = I915_READ(IMR); - /* VGA state */ dev_priv->saveVGA0 = I915_READ(VGA0); dev_priv->saveVGA1 = I915_READ(VGA1); dev_priv->saveVGA_PD = I915_READ(VGA_PD); - dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); + if (IS_IGDNG(dev)) + dev_priv->saveVGACNTRL = I915_READ(CPU_VGACNTRL); + else + dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); + + i915_save_vga(dev); +} + +void i915_restore_display(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Display arbitration */ + I915_WRITE(DSPARB, dev_priv->saveDSPARB); + + /* Display port ratios (must be done before clock is set) */ + if (SUPPORTS_INTEGRATED_DP(dev)) { + I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M); + I915_WRITE(PIPEB_GMCH_DATA_M, dev_priv->savePIPEB_GMCH_DATA_M); + I915_WRITE(PIPEA_GMCH_DATA_N, dev_priv->savePIPEA_GMCH_DATA_N); + I915_WRITE(PIPEB_GMCH_DATA_N, dev_priv->savePIPEB_GMCH_DATA_N); + I915_WRITE(PIPEA_DP_LINK_M, dev_priv->savePIPEA_DP_LINK_M); + I915_WRITE(PIPEB_DP_LINK_M, dev_priv->savePIPEB_DP_LINK_M); + I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); + I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); + } + + /* This is only meaningful in non-KMS mode */ + /* Don't restore them in KMS mode */ + i915_restore_modeset_reg(dev); + + /* Cursor state */ + I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); + I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); + I915_WRITE(CURABASE, dev_priv->saveCURABASE); + I915_WRITE(CURBPOS, dev_priv->saveCURBPOS); + I915_WRITE(CURBCNTR, dev_priv->saveCURBCNTR); + I915_WRITE(CURBBASE, dev_priv->saveCURBBASE); + if (!IS_I9XX(dev)) + I915_WRITE(CURSIZE, dev_priv->saveCURSIZE); + + /* CRT state */ + if (IS_IGDNG(dev)) + I915_WRITE(PCH_ADPA, dev_priv->saveADPA); + else + I915_WRITE(ADPA, dev_priv->saveADPA); + + /* LVDS state */ + if (IS_I965G(dev) && !IS_IGDNG(dev)) + I915_WRITE(BLC_PWM_CTL2, dev_priv->saveBLC_PWM_CTL2); + + if (IS_IGDNG(dev)) { + I915_WRITE(PCH_LVDS, dev_priv->saveLVDS); + } else if (IS_MOBILE(dev) && !IS_I830(dev)) + I915_WRITE(LVDS, dev_priv->saveLVDS); + + if (!IS_I830(dev) && !IS_845G(dev) && !IS_IGDNG(dev)) + I915_WRITE(PFIT_CONTROL, dev_priv->savePFIT_CONTROL); + + if (IS_IGDNG(dev)) { + I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL); + I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2); + I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL); + I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2); + I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS); + I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); + I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR); + I915_WRITE(PCH_PP_CONTROL, dev_priv->savePP_CONTROL); + } else { + I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS); + I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL); + I915_WRITE(BLC_HIST_CTL, dev_priv->saveBLC_HIST_CTL); + I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS); + I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); + I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR); + I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL); + } + + /* Display Port state */ + if (SUPPORTS_INTEGRATED_DP(dev)) { + I915_WRITE(DP_B, dev_priv->saveDP_B); + I915_WRITE(DP_C, dev_priv->saveDP_C); + I915_WRITE(DP_D, dev_priv->saveDP_D); + } + /* FIXME: restore TV & SDVO state */ + + /* FBC info */ + I915_WRITE(FBC_CFB_BASE, dev_priv->saveFBC_CFB_BASE); + I915_WRITE(FBC_LL_BASE, dev_priv->saveFBC_LL_BASE); + I915_WRITE(FBC_CONTROL2, dev_priv->saveFBC_CONTROL2); + I915_WRITE(FBC_CONTROL, dev_priv->saveFBC_CONTROL); + + /* VGA state */ + if (IS_IGDNG(dev)) + I915_WRITE(CPU_VGACNTRL, dev_priv->saveVGACNTRL); + else + I915_WRITE(VGACNTRL, dev_priv->saveVGACNTRL); + I915_WRITE(VGA0, dev_priv->saveVGA0); + I915_WRITE(VGA1, dev_priv->saveVGA1); + I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); + DRM_UDELAY(150); + + i915_restore_vga(dev); +} + +int i915_save_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); + + /* Hardware status page */ + dev_priv->saveHWS = I915_READ(HWS_PGA); + + i915_save_display(dev); + + /* Interrupt state */ + if (IS_IGDNG(dev)) { + dev_priv->saveDEIER = I915_READ(DEIER); + dev_priv->saveDEIMR = I915_READ(DEIMR); + dev_priv->saveGTIER = I915_READ(GTIER); + dev_priv->saveGTIMR = I915_READ(GTIMR); + dev_priv->saveFDI_RXA_IMR = I915_READ(FDI_RXA_IMR); + dev_priv->saveFDI_RXB_IMR = I915_READ(FDI_RXB_IMR); + } else { + dev_priv->saveIER = I915_READ(IER); + dev_priv->saveIMR = I915_READ(IMR); + } /* Clock gating state */ dev_priv->saveD_STATE = I915_READ(D_STATE); @@ -489,7 +774,6 @@ for (i = 0; i < 8; i++) dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); } - i915_save_vga(dev); return 0; } @@ -508,9 +792,6 @@ /* Hardware status page */ I915_WRITE(HWS_PGA, dev_priv->saveHWS); - /* Display arbitration */ - I915_WRITE(DSPARB, dev_priv->saveDSPARB); - /* Fences */ if (IS_I965G(dev)) { for (i = 0; i < 16; i++) @@ -522,69 +803,21 @@ for (i = 0; i < 8; i++) I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); } - - /* Display port ratios (must be done before clock is set) */ - if (SUPPORTS_INTEGRATED_DP(dev)) { - I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M); - I915_WRITE(PIPEB_GMCH_DATA_M, dev_priv->savePIPEB_GMCH_DATA_M); - I915_WRITE(PIPEA_GMCH_DATA_N, dev_priv->savePIPEA_GMCH_DATA_N); - I915_WRITE(PIPEB_GMCH_DATA_N, dev_priv->savePIPEB_GMCH_DATA_N); - I915_WRITE(PIPEA_DP_LINK_M, dev_priv->savePIPEA_DP_LINK_M); - I915_WRITE(PIPEB_DP_LINK_M, dev_priv->savePIPEB_DP_LINK_M); - I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); - I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); - } - /* This is only meaningful in non-KMS mode */ - /* Don't restore them in KMS mode */ - i915_restore_modeset_reg(dev); - /* Cursor state */ - I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); - I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); - I915_WRITE(CURABASE, dev_priv->saveCURABASE); - I915_WRITE(CURBPOS, dev_priv->saveCURBPOS); - I915_WRITE(CURBCNTR, dev_priv->saveCURBCNTR); - I915_WRITE(CURBBASE, dev_priv->saveCURBBASE); - if (!IS_I9XX(dev)) - I915_WRITE(CURSIZE, dev_priv->saveCURSIZE); - /* CRT state */ - I915_WRITE(ADPA, dev_priv->saveADPA); + i915_restore_display(dev); - /* LVDS state */ - if (IS_I965G(dev)) - I915_WRITE(BLC_PWM_CTL2, dev_priv->saveBLC_PWM_CTL2); - if (IS_MOBILE(dev) && !IS_I830(dev)) - I915_WRITE(LVDS, dev_priv->saveLVDS); - if (!IS_I830(dev) && !IS_845G(dev)) - I915_WRITE(PFIT_CONTROL, dev_priv->savePFIT_CONTROL); - - I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS); - I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL); - I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS); - I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); - I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR); - I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL); - - /* Display Port state */ - if (SUPPORTS_INTEGRATED_DP(dev)) { - I915_WRITE(DP_B, dev_priv->saveDP_B); - I915_WRITE(DP_C, dev_priv->saveDP_C); - I915_WRITE(DP_D, dev_priv->saveDP_D); + /* Interrupt state */ + if (IS_IGDNG(dev)) { + I915_WRITE(DEIER, dev_priv->saveDEIER); + I915_WRITE(DEIMR, dev_priv->saveDEIMR); + I915_WRITE(GTIER, dev_priv->saveGTIER); + I915_WRITE(GTIMR, dev_priv->saveGTIMR); + I915_WRITE(FDI_RXA_IMR, dev_priv->saveFDI_RXA_IMR); + I915_WRITE(FDI_RXB_IMR, dev_priv->saveFDI_RXB_IMR); + } else { + I915_WRITE (IER, dev_priv->saveIER); + I915_WRITE (IMR, dev_priv->saveIMR); } - /* FIXME: restore TV & SDVO state */ - - /* FBC info */ - I915_WRITE(FBC_CFB_BASE, dev_priv->saveFBC_CFB_BASE); - I915_WRITE(FBC_LL_BASE, dev_priv->saveFBC_LL_BASE); - I915_WRITE(FBC_CONTROL2, dev_priv->saveFBC_CONTROL2); - I915_WRITE(FBC_CONTROL, dev_priv->saveFBC_CONTROL); - - /* VGA state */ - I915_WRITE(VGACNTRL, dev_priv->saveVGACNTRL); - I915_WRITE(VGA0, dev_priv->saveVGA0); - I915_WRITE(VGA1, dev_priv->saveVGA1); - I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); - DRM_UDELAY(150); /* Clock gating state */ I915_WRITE (D_STATE, dev_priv->saveD_STATE); @@ -603,8 +836,6 @@ for (i = 0; i < 3; i++) I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]); - i915_restore_vga(dev); - return 0; } --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_lvds.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_lvds.c @@ -305,6 +305,10 @@ goto out; } + /* full screen scale for now */ + if (IS_IGDNG(dev)) + goto out; + /* 965+ wants fuzzy fitting */ if (IS_I965G(dev)) pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) | @@ -332,8 +336,10 @@ * to register description and PRM. * Change the value here to see the borders for debugging */ - I915_WRITE(BCLRPAT_A, 0); - I915_WRITE(BCLRPAT_B, 0); + if (!IS_IGDNG(dev)) { + I915_WRITE(BCLRPAT_A, 0); + I915_WRITE(BCLRPAT_B, 0); + } switch (lvds_priv->fitting_mode) { case DRM_MODE_SCALE_NO_SCALE: @@ -582,7 +588,6 @@ * settings. */ - /* No panel fitting yet, fixme */ if (IS_IGDNG(dev)) return; --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_drv.h +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_drv.h @@ -188,6 +188,8 @@ u32 gt_irq_mask_reg; u32 gt_irq_enable_reg; u32 de_irq_enable_reg; + u32 pch_irq_mask_reg; + u32 pch_irq_enable_reg; u32 hotplug_supported_mask; struct work_struct hotplug_work; @@ -256,6 +258,13 @@ u32 saveVBLANK_A; u32 saveVSYNC_A; u32 saveBCLRPAT_A; + u32 saveTRANSACONF; + u32 saveTRANS_HTOTAL_A; + u32 saveTRANS_HBLANK_A; + u32 saveTRANS_HSYNC_A; + u32 saveTRANS_VTOTAL_A; + u32 saveTRANS_VBLANK_A; + u32 saveTRANS_VSYNC_A; u32 savePIPEASTAT; u32 saveDSPASTRIDE; u32 saveDSPASIZE; @@ -264,8 +273,11 @@ u32 saveDSPASURF; u32 saveDSPATILEOFF; u32 savePFIT_PGM_RATIOS; + u32 saveBLC_HIST_CTL; u32 saveBLC_PWM_CTL; u32 saveBLC_PWM_CTL2; + u32 saveBLC_CPU_PWM_CTL; + u32 saveBLC_CPU_PWM_CTL2; u32 saveFPB0; u32 saveFPB1; u32 saveDPLL_B; @@ -277,6 +289,13 @@ u32 saveVBLANK_B; u32 saveVSYNC_B; u32 saveBCLRPAT_B; + u32 saveTRANSBCONF; + u32 saveTRANS_HTOTAL_B; + u32 saveTRANS_HBLANK_B; + u32 saveTRANS_HSYNC_B; + u32 saveTRANS_VTOTAL_B; + u32 saveTRANS_VBLANK_B; + u32 saveTRANS_VSYNC_B; u32 savePIPEBSTAT; u32 saveDSPBSTRIDE; u32 saveDSPBSIZE; @@ -309,6 +328,12 @@ u32 saveIER; u32 saveIIR; u32 saveIMR; + u32 saveDEIER; + u32 saveDEIMR; + u32 saveGTIER; + u32 saveGTIMR; + u32 saveFDI_RXA_IMR; + u32 saveFDI_RXB_IMR; u32 saveCACHE_MODE_0; u32 saveD_STATE; u32 saveCG_2D_DIS; @@ -342,6 +367,26 @@ u32 savePIPEB_DP_LINK_M; u32 savePIPEA_DP_LINK_N; u32 savePIPEB_DP_LINK_N; + u32 saveFDI_RXA_CTL; + u32 saveFDI_TXA_CTL; + u32 saveFDI_RXB_CTL; + u32 saveFDI_TXB_CTL; + u32 savePFA_CTL_1; + u32 savePFB_CTL_1; + u32 savePFA_WIN_SZ; + u32 savePFB_WIN_SZ; + u32 savePFA_WIN_POS; + u32 savePFB_WIN_POS; + u32 savePCH_DREF_CONTROL; + u32 saveDISP_ARB_CTL; + u32 savePIPEA_DATA_M1; + u32 savePIPEA_DATA_N1; + u32 savePIPEA_LINK_M1; + u32 savePIPEA_LINK_N1; + u32 savePIPEB_DATA_M1; + u32 savePIPEB_DATA_N1; + u32 savePIPEB_LINK_M1; + u32 savePIPEB_LINK_N1; struct { struct drm_mm gtt_space; @@ -576,6 +621,8 @@ extern int i915_max_ioctl; extern unsigned int i915_fbpercrtc; +extern void i915_save_display(struct drm_device *dev); +extern void i915_restore_display(struct drm_device *dev); extern int i915_master_create(struct drm_device *dev, struct drm_master *master); extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master); @@ -627,6 +674,8 @@ void i915_disable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask); +void intel_enable_asle (struct drm_device *dev); + /* i915_mem.c */ extern int i915_mem_alloc(struct drm_device *dev, void *data, @@ -746,11 +795,13 @@ extern int intel_opregion_init(struct drm_device *dev, int resume); extern void intel_opregion_free(struct drm_device *dev, int suspend); extern void opregion_asle_intr(struct drm_device *dev); +extern void ironlake_opregion_gse_intr(struct drm_device *dev); extern void opregion_enable_asle(struct drm_device *dev); #else static inline int intel_opregion_init(struct drm_device *dev, int resume) { return 0; } static inline void intel_opregion_free(struct drm_device *dev, int suspend) { return; } static inline void opregion_asle_intr(struct drm_device *dev) { return; } +static inline void ironlake_opregion_gse_intr(struct drm_device *dev) { return; } static inline void opregion_enable_asle(struct drm_device *dev) { return; } #endif @@ -837,6 +888,7 @@ #define IS_I85X(dev) ((dev)->pci_device == 0x3582) #define IS_I855(dev) ((dev)->pci_device == 0x3582) #define IS_I865G(dev) ((dev)->pci_device == 0x2572) +#define IS_I8XX(dev) (IS_I830(dev) || IS_845G(dev) || IS_I85X(dev) || IS_I865G(dev)) #define IS_I915G(dev) ((dev)->pci_device == 0x2582 || (dev)->pci_device == 0x258a) #define IS_I915GM(dev) ((dev)->pci_device == 0x2592) @@ -854,6 +906,7 @@ (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ (dev)->pci_device == 0x0042 || \ (dev)->pci_device == 0x0046) @@ -866,6 +919,7 @@ (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ IS_GM45(dev)) #define IS_IGDG(dev) ((dev)->pci_device == 0xa001) @@ -896,9 +950,12 @@ */ #define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \ IS_I915GM(dev))) +#define SUPPORTS_DIGITAL_OUTPUTS(dev) (IS_I9XX(dev) && !IS_IGD(dev)) #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev)) +#define SUPPORTS_TV(dev) (IS_I9XX(dev) && IS_MOBILE(dev) && \ + !IS_IGDNG(dev) && !IS_IGD(dev)) #define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) /* dsparb controlled by hw only */ #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev)) --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_reg.h +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_reg.h @@ -915,6 +915,8 @@ #define BACKLIGHT_DUTY_CYCLE_SHIFT (0) #define BACKLIGHT_DUTY_CYCLE_MASK (0xffff) +#define BLC_HIST_CTL 0x61260 + /* TV port control */ #define TV_CTL 0x68000 /** Enables the TV encoder */ @@ -1616,6 +1618,11 @@ #define PIPE_START_VBLANK_INTERRUPT_STATUS (1UL<<2) /* 965 or later */ #define PIPE_VBLANK_INTERRUPT_STATUS (1UL<<1) #define PIPE_OVERLAY_UPDATED_STATUS (1UL<<0) +#define PIPE_BPC_MASK (7 << 5) /* Ironlake */ +#define PIPE_8BPC (0 << 5) +#define PIPE_10BPC (1 << 5) +#define PIPE_6BPC (2 << 5) +#define PIPE_12BPC (3 << 5) #define DSPARB 0x70030 #define DSPARB_CSTART_MASK (0x7f << 7) @@ -1733,6 +1740,7 @@ #define DISPPLANE_NO_LINE_DOUBLE 0 #define DISPPLANE_STEREO_POLARITY_FIRST 0 #define DISPPLANE_STEREO_POLARITY_SECOND (1<<18) +#define DISPPLANE_TRICKLE_FEED_DISABLE (1<<14) /* IGDNG */ #define DISPPLANE_TILED (1<<10) #define DSPAADDR 0x70184 #define DSPASTRIDE 0x70188 @@ -1865,8 +1873,15 @@ #define PFA_CTL_1 0x68080 #define PFB_CTL_1 0x68880 #define PF_ENABLE (1<<31) +#define PF_FILTER_MASK (3<<23) +#define PF_FILTER_PROGRAMMED (0<<23) +#define PF_FILTER_MED_3x3 (1<<23) +#define PF_FILTER_EDGE_ENHANCE (2<<23) +#define PF_FILTER_EDGE_SOFTEN (3<<23) #define PFA_WIN_SZ 0x68074 #define PFB_WIN_SZ 0x68874 +#define PFA_WIN_POS 0x68070 +#define PFB_WIN_POS 0x68870 /* legacy palette */ #define LGC_PALETTE_A 0x4a000 @@ -1913,6 +1928,9 @@ #define GTIIR 0x44018 #define GTIER 0x4401c +#define DISP_ARB_CTL 0x45000 +#define DISP_TILE_SURFACE_SWIZZLING (1<<13) + /* PCH */ /* south display engine interrupt */ @@ -1921,6 +1939,7 @@ #define SDE_PORTC_HOTPLUG (1 << 9) #define SDE_PORTB_HOTPLUG (1 << 8) #define SDE_SDVOB_HOTPLUG (1 << 6) +#define SDE_HOTPLUG_MASK (0xf << 8) #define SDEISR 0xc4000 #define SDEIMR 0xc4004 @@ -1979,11 +1998,11 @@ #define DREF_CPU_SOURCE_OUTPUT_MASK (3<<13) #define DREF_SSC_SOURCE_DISABLE (0<<11) #define DREF_SSC_SOURCE_ENABLE (2<<11) -#define DREF_SSC_SOURCE_MASK (2<<11) +#define DREF_SSC_SOURCE_MASK (3<<11) #define DREF_NONSPREAD_SOURCE_DISABLE (0<<9) #define DREF_NONSPREAD_CK505_ENABLE (1<<9) #define DREF_NONSPREAD_SOURCE_ENABLE (2<<9) -#define DREF_NONSPREAD_SOURCE_MASK (2<<9) +#define DREF_NONSPREAD_SOURCE_MASK (3<<9) #define DREF_SUPERSPREAD_SOURCE_DISABLE (0<<7) #define DREF_SUPERSPREAD_SOURCE_ENABLE (2<<7) #define DREF_SSC4_DOWNSPREAD (0<<6) --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/i915_opregion.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/i915_opregion.c @@ -32,6 +32,8 @@ #include "i915_drm.h" #include "i915_drv.h" +#define I915_DRV "i915_drv" + #define PCI_ASLE 0xe4 #define PCI_LBPC 0xf4 #define PCI_ASLS 0xfc @@ -118,6 +120,10 @@ #define ASLE_BACKLIGHT_FAIL (2<<12) #define ASLE_PFIT_FAIL (2<<14) #define ASLE_PWM_FREQ_FAIL (2<<16) +#define ASLE_ALS_ILLUM_FAILED (1<<10) +#define ASLE_BACKLIGHT_FAILED (1<<12) +#define ASLE_PFIT_FAILED (1<<14) +#define ASLE_PWM_FREQ_FAILED (1<<16) /* ASLE backlight brightness to set */ #define ASLE_BCLP_VALID (1<<31) @@ -231,6 +237,73 @@ asle->aslc = asle_stat; } +static u32 asle_set_backlight_ironlake(struct drm_device *dev, u32 bclp) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct opregion_asle *asle = dev_priv->opregion.asle; + u32 cpu_pwm_ctl, pch_pwm_ctl2; + u32 max_backlight, level; + + if (!(bclp & ASLE_BCLP_VALID)) + return ASLE_BACKLIGHT_FAILED; + + bclp &= ASLE_BCLP_MSK; + if (bclp < 0 || bclp > 255) + return ASLE_BACKLIGHT_FAILED; + + cpu_pwm_ctl = I915_READ(BLC_PWM_CPU_CTL); + pch_pwm_ctl2 = I915_READ(BLC_PWM_PCH_CTL2); + /* get the max PWM frequency */ + max_backlight = (pch_pwm_ctl2 >> 16) & BACKLIGHT_DUTY_CYCLE_MASK; + /* calculate the expected PMW frequency */ + level = (bclp * max_backlight) / 255; + /* reserve the high 16 bits */ + cpu_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK); + /* write the updated PWM frequency */ + I915_WRITE(BLC_PWM_CPU_CTL, cpu_pwm_ctl | level); + + asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID; + + return 0; +} + +void ironlake_opregion_gse_intr(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct opregion_asle *asle = dev_priv->opregion.asle; + u32 asle_stat = 0; + u32 asle_req; + + if (!asle) + return; + + asle_req = asle->aslc & ASLE_REQ_MSK; + + if (!asle_req) { + DRM_DEBUG_DRIVER(I915_DRV, "non asle set request??\n"); + return; + } + + if (asle_req & ASLE_SET_ALS_ILLUM) { + DRM_DEBUG_DRIVER(I915_DRV, "Illum is not supported\n"); + asle_stat |= ASLE_ALS_ILLUM_FAILED; + } + + if (asle_req & ASLE_SET_BACKLIGHT) + asle_stat |= asle_set_backlight_ironlake(dev, asle->bclp); + + if (asle_req & ASLE_SET_PFIT) { + DRM_DEBUG_DRIVER(I915_DRV, "Pfit is not supported\n"); + asle_stat |= ASLE_PFIT_FAILED; + } + + if (asle_req & ASLE_SET_PWM_FREQ) { + DRM_DEBUG_DRIVER(I915_DRV, "PWM freq is not supported\n"); + asle_stat |= ASLE_PWM_FREQ_FAILED; + } + + asle->aslc = asle_stat; +} #define ASLE_ALS_EN (1<<0) #define ASLE_BLC_EN (1<<1) #define ASLE_PFIT_EN (1<<2) @@ -246,8 +319,7 @@ unsigned long irqflags; spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); - i915_enable_pipestat(dev_priv, 1, - I915_LEGACY_BLC_EVENT_ENABLE); + intel_enable_asle(dev); spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags); } --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_sdvo.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_sdvo.c @@ -114,6 +114,9 @@ /* DDC bus used by this SDVO output */ uint8_t ddc_bus; + /* Mac mini hack -- use the same DDC as the analog connector */ + struct i2c_adapter *analog_ddc_bus; + int save_sdvo_mult; u16 save_active_outputs; struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2; @@ -1478,6 +1481,36 @@ return (caps > 1); } +static struct drm_connector * +intel_find_analog_connector(struct drm_device *dev) +{ + struct drm_connector *connector; + struct intel_output *intel_output; + + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + intel_output = to_intel_output(connector); + if (intel_output->type == INTEL_OUTPUT_ANALOG) + return connector; + } + return NULL; +} + +static int +intel_analog_is_connected(struct drm_device *dev) +{ + struct drm_connector *analog_connector; + analog_connector = intel_find_analog_connector(dev); + + if (!analog_connector) + return false; + + if (analog_connector->funcs->detect(analog_connector) == + connector_status_disconnected) + return false; + + return true; +} + enum drm_connector_status intel_sdvo_hdmi_sink_detect(struct drm_connector *connector, u16 response) { @@ -1488,6 +1521,15 @@ edid = drm_get_edid(&intel_output->base, intel_output->ddc_bus); + + /* when there is no edid and no monitor is connected with VGA + * port, try to use the CRT ddc to read the EDID for DVI-connector + */ + if (edid == NULL && + sdvo_priv->analog_ddc_bus && + !intel_analog_is_connected(intel_output->base.dev)) + edid = drm_get_edid(&intel_output->base, + sdvo_priv->analog_ddc_bus); if (edid != NULL) { /* Don't report the output as connected if it's a DVI-I * connector with a non-digital EDID coming out. @@ -1540,31 +1582,32 @@ static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + int num_modes; /* set the bus switch and get the modes */ - intel_ddc_get_modes(intel_output); + num_modes = intel_ddc_get_modes(intel_output); -#if 0 - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - /* Mac mini hack. On this device, I get DDC through the analog, which - * load-detects as disconnected. I fail to DDC through the SDVO DDC, - * but it does load-detect as connected. So, just steal the DDC bits - * from analog when we fail at finding it the right way. + /* + * Mac mini hack. On this device, the DVI-I connector shares one DDC + * link between analog and digital outputs. So, if the regular SDVO + * DDC fails, check to see if the analog output is disconnected, in + * which case we'll look there for the digital DDC data. */ - crt = xf86_config->output[0]; - intel_output = crt->driver_private; - if (intel_output->type == I830_OUTPUT_ANALOG && - crt->funcs->detect(crt) == XF86OutputStatusDisconnected) { - I830I2CInit(pScrn, &intel_output->pDDCBus, GPIOA, "CRTDDC_A"); - edid_mon = xf86OutputGetEDID(crt, intel_output->pDDCBus); - xf86DestroyI2CBusRec(intel_output->pDDCBus, true, true); - } - if (edid_mon) { - xf86OutputSetEDID(output, edid_mon); - modes = xf86OutputGetEDIDModes(output); + if (num_modes == 0 && + sdvo_priv->analog_ddc_bus && + !intel_analog_is_connected(intel_output->base.dev)) { + struct i2c_adapter *digital_ddc_bus; + + /* Switch to the analog ddc bus and try that + */ + digital_ddc_bus = intel_output->ddc_bus; + intel_output->ddc_bus = sdvo_priv->analog_ddc_bus; + + (void) intel_ddc_get_modes(intel_output); + + intel_output->ddc_bus = digital_ddc_bus; } -#endif } /** @@ -1748,6 +1791,8 @@ intel_i2c_destroy(intel_output->i2c_bus); if (intel_output->ddc_bus) intel_i2c_destroy(intel_output->ddc_bus); + if (sdvo_priv->analog_ddc_bus) + intel_i2c_destroy(sdvo_priv->analog_ddc_bus); if (sdvo_priv->sdvo_lvds_fixed_mode != NULL) drm_mode_destroy(connector->dev, @@ -2074,10 +2119,15 @@ } /* setup the DDC bus. */ - if (output_device == SDVOB) + if (output_device == SDVOB) { intel_output->ddc_bus = intel_i2c_create(dev, GPIOE, "SDVOB DDC BUS"); - else + sdvo_priv->analog_ddc_bus = intel_i2c_create(dev, GPIOA, + "SDVOB/VGA DDC BUS"); + } else { intel_output->ddc_bus = intel_i2c_create(dev, GPIOE, "SDVOC DDC BUS"); + sdvo_priv->analog_ddc_bus = intel_i2c_create(dev, GPIOA, + "SDVOC/VGA DDC BUS"); + } if (intel_output->ddc_bus == NULL) goto err_i2c; @@ -2143,6 +2193,8 @@ return true; err_i2c: + if (sdvo_priv->analog_ddc_bus != NULL) + intel_i2c_destroy(sdvo_priv->analog_ddc_bus); if (intel_output->ddc_bus != NULL) intel_i2c_destroy(intel_output->ddc_bus); if (intel_output->i2c_bus != NULL) --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_hdmi.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_hdmi.c @@ -77,14 +77,32 @@ struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; u32 temp; - if (mode != DRM_MODE_DPMS_ON) { - temp = I915_READ(hdmi_priv->sdvox_reg); + temp = I915_READ(hdmi_priv->sdvox_reg); + + /* HW workaround, need to toggle enable bit off and on for 12bpc, but + * we do this anyway which shows more stable in testing. + */ + if (IS_IGDNG(dev)) { I915_WRITE(hdmi_priv->sdvox_reg, temp & ~SDVO_ENABLE); + POSTING_READ(hdmi_priv->sdvox_reg); + } + + if (mode != DRM_MODE_DPMS_ON) { + temp &= ~SDVO_ENABLE; } else { - temp = I915_READ(hdmi_priv->sdvox_reg); - I915_WRITE(hdmi_priv->sdvox_reg, temp | SDVO_ENABLE); + temp |= SDVO_ENABLE; } + + I915_WRITE(hdmi_priv->sdvox_reg, temp); POSTING_READ(hdmi_priv->sdvox_reg); + + /* HW workaround, need to write this twice for issue that may result + * in first write getting masked. + */ + if (IS_IGDNG(dev)) { + I915_WRITE(hdmi_priv->sdvox_reg, temp); + POSTING_READ(hdmi_priv->sdvox_reg); + } } static void intel_hdmi_save(struct drm_connector *connector) @@ -223,7 +241,7 @@ connector = &intel_output->base; drm_connector_init(dev, connector, &intel_hdmi_connector_funcs, - DRM_MODE_CONNECTOR_DVID); + DRM_MODE_CONNECTOR_HDMIA); drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs); intel_output->type = INTEL_OUTPUT_HDMI; --- linux-ec2-2.6.31.orig/drivers/gpu/drm/i915/intel_crt.c +++ linux-ec2-2.6.31/drivers/gpu/drm/i915/intel_crt.c @@ -151,13 +151,10 @@ { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 adpa, temp; + u32 adpa; bool ret; - temp = adpa = I915_READ(PCH_ADPA); - - adpa &= ~ADPA_DAC_ENABLE; - I915_WRITE(PCH_ADPA, adpa); + adpa = I915_READ(PCH_ADPA); adpa &= ~ADPA_CRT_HOTPLUG_MASK; @@ -184,8 +181,6 @@ else ret = false; - /* restore origin register */ - I915_WRITE(PCH_ADPA, temp); return ret; } @@ -239,8 +234,8 @@ } while (time_after(timeout, jiffies)); } - if ((I915_READ(PORT_HOTPLUG_STAT) & CRT_HOTPLUG_MONITOR_MASK) == - CRT_HOTPLUG_MONITOR_COLOR) + if ((I915_READ(PORT_HOTPLUG_STAT) & CRT_HOTPLUG_MONITOR_MASK) != + CRT_HOTPLUG_MONITOR_NONE) return true; return false; --- linux-ec2-2.6.31.orig/drivers/pcmcia/cardbus.c +++ linux-ec2-2.6.31/drivers/pcmcia/cardbus.c @@ -214,7 +214,7 @@ unsigned int max, pass; s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0)); -// pcibios_fixup_bus(bus); + pci_fixup_cardbus(bus); max = bus->secondary; for (pass = 0; pass < 2; pass++) --- linux-ec2-2.6.31.orig/drivers/pcmcia/bfin_cf_pcmcia.c +++ linux-ec2-2.6.31/drivers/pcmcia/bfin_cf_pcmcia.c @@ -302,7 +302,7 @@ static int bfin_cf_suspend(struct platform_device *pdev, pm_message_t mesg) { - return pcmcia_socket_dev_suspend(&pdev->dev, mesg); + return pcmcia_socket_dev_suspend(&pdev->dev); } static int bfin_cf_resume(struct platform_device *pdev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/pd6729.c +++ linux-ec2-2.6.31/drivers/pcmcia/pd6729.c @@ -758,7 +758,7 @@ #ifdef CONFIG_PM static int pd6729_socket_suspend(struct pci_dev *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int pd6729_socket_resume(struct pci_dev *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/cs.c +++ linux-ec2-2.6.31/drivers/pcmcia/cs.c @@ -98,10 +98,13 @@ * These functions check for the appropriate struct pcmcia_soket arrays, * and pass them to the low-level functions pcmcia_{suspend,resume}_socket */ +static int socket_early_resume(struct pcmcia_socket *skt); +static int socket_late_resume(struct pcmcia_socket *skt); static int socket_resume(struct pcmcia_socket *skt); static int socket_suspend(struct pcmcia_socket *skt); -int pcmcia_socket_dev_suspend(struct device *dev, pm_message_t state) +static void pcmcia_socket_dev_run(struct device *dev, + int (*cb)(struct pcmcia_socket *)) { struct pcmcia_socket *socket; @@ -110,29 +113,34 @@ if (socket->dev.parent != dev) continue; mutex_lock(&socket->skt_mutex); - socket_suspend(socket); + cb(socket); mutex_unlock(&socket->skt_mutex); } up_read(&pcmcia_socket_list_rwsem); +} +int pcmcia_socket_dev_suspend(struct device *dev) +{ + pcmcia_socket_dev_run(dev, socket_suspend); return 0; } EXPORT_SYMBOL(pcmcia_socket_dev_suspend); -int pcmcia_socket_dev_resume(struct device *dev) +void pcmcia_socket_dev_early_resume(struct device *dev) { - struct pcmcia_socket *socket; + pcmcia_socket_dev_run(dev, socket_early_resume); +} +EXPORT_SYMBOL(pcmcia_socket_dev_early_resume); - down_read(&pcmcia_socket_list_rwsem); - list_for_each_entry(socket, &pcmcia_socket_list, socket_list) { - if (socket->dev.parent != dev) - continue; - mutex_lock(&socket->skt_mutex); - socket_resume(socket); - mutex_unlock(&socket->skt_mutex); - } - up_read(&pcmcia_socket_list_rwsem); +void pcmcia_socket_dev_late_resume(struct device *dev) +{ + pcmcia_socket_dev_run(dev, socket_late_resume); +} +EXPORT_SYMBOL(pcmcia_socket_dev_late_resume); +int pcmcia_socket_dev_resume(struct device *dev) +{ + pcmcia_socket_dev_run(dev, socket_resume); return 0; } EXPORT_SYMBOL(pcmcia_socket_dev_resume); @@ -546,29 +554,24 @@ return 0; } -/* - * Resume a socket. If a card is present, verify its CIS against - * our cached copy. If they are different, the card has been - * replaced, and we need to tell the drivers. - */ -static int socket_resume(struct pcmcia_socket *skt) +static int socket_early_resume(struct pcmcia_socket *skt) { - int ret; - - if (!(skt->state & SOCKET_SUSPEND)) - return -EBUSY; - skt->socket = dead_socket; skt->ops->init(skt); skt->ops->set_socket(skt, &skt->socket); + if (skt->state & SOCKET_PRESENT) + skt->resume_status = socket_setup(skt, resume_delay); + return 0; +} +static int socket_late_resume(struct pcmcia_socket *skt) +{ if (!(skt->state & SOCKET_PRESENT)) { skt->state &= ~SOCKET_SUSPEND; return socket_insert(skt); } - ret = socket_setup(skt, resume_delay); - if (ret == 0) { + if (skt->resume_status == 0) { /* * FIXME: need a better check here for cardbus cards. */ @@ -596,6 +599,20 @@ return 0; } +/* + * Resume a socket. If a card is present, verify its CIS against + * our cached copy. If they are different, the card has been + * replaced, and we need to tell the drivers. + */ +static int socket_resume(struct pcmcia_socket *skt) +{ + if (!(skt->state & SOCKET_SUSPEND)) + return -EBUSY; + + socket_early_resume(skt); + return socket_late_resume(skt); +} + static void socket_remove(struct pcmcia_socket *skt) { dev_printk(KERN_NOTICE, &skt->dev, --- linux-ec2-2.6.31.orig/drivers/pcmcia/omap_cf.c +++ linux-ec2-2.6.31/drivers/pcmcia/omap_cf.c @@ -334,7 +334,7 @@ static int omap_cf_suspend(struct platform_device *pdev, pm_message_t mesg) { - return pcmcia_socket_dev_suspend(&pdev->dev, mesg); + return pcmcia_socket_dev_suspend(&pdev->dev); } static int omap_cf_resume(struct platform_device *pdev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/tcic.c +++ linux-ec2-2.6.31/drivers/pcmcia/tcic.c @@ -366,7 +366,7 @@ static int tcic_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int tcic_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/at91_cf.c +++ linux-ec2-2.6.31/drivers/pcmcia/at91_cf.c @@ -363,7 +363,7 @@ struct at91_cf_socket *cf = platform_get_drvdata(pdev); struct at91_cf_data *board = cf->board; - pcmcia_socket_dev_suspend(&pdev->dev, mesg); + pcmcia_socket_dev_suspend(&pdev->dev); if (device_may_wakeup(&pdev->dev)) { enable_irq_wake(board->det_pin); if (board->irq_pin) --- linux-ec2-2.6.31.orig/drivers/pcmcia/m32r_cfc.c +++ linux-ec2-2.6.31/drivers/pcmcia/m32r_cfc.c @@ -699,7 +699,7 @@ static int cfc_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int cfc_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/m8xx_pcmcia.c +++ linux-ec2-2.6.31/drivers/pcmcia/m8xx_pcmcia.c @@ -1296,7 +1296,7 @@ #ifdef CONFIG_PM static int m8xx_suspend(struct platform_device *pdev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&pdev->dev, state); + return pcmcia_socket_dev_suspend(&pdev->dev); } static int m8xx_resume(struct platform_device *pdev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/m32r_pcc.c +++ linux-ec2-2.6.31/drivers/pcmcia/m32r_pcc.c @@ -675,7 +675,7 @@ static int pcc_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int pcc_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/pxa2xx_base.c +++ linux-ec2-2.6.31/drivers/pcmcia/pxa2xx_base.c @@ -302,7 +302,7 @@ static int pxa2xx_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int pxa2xx_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/au1000_generic.c +++ linux-ec2-2.6.31/drivers/pcmcia/au1000_generic.c @@ -515,7 +515,7 @@ static int au1x00_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int au1x00_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/yenta_socket.c +++ linux-ec2-2.6.31/drivers/pcmcia/yenta_socket.c @@ -1225,60 +1225,81 @@ } #ifdef CONFIG_PM -static int yenta_dev_suspend (struct pci_dev *dev, pm_message_t state) +static int yenta_dev_suspend_noirq(struct device *dev) { - struct yenta_socket *socket = pci_get_drvdata(dev); + struct pci_dev *pdev = to_pci_dev(dev); + struct yenta_socket *socket = pci_get_drvdata(pdev); int ret; - ret = pcmcia_socket_dev_suspend(&dev->dev, state); + ret = pcmcia_socket_dev_suspend(dev); - if (socket) { - if (socket->type && socket->type->save_state) - socket->type->save_state(socket); - - /* FIXME: pci_save_state needs to have a better interface */ - pci_save_state(dev); - pci_read_config_dword(dev, 16*4, &socket->saved_state[0]); - pci_read_config_dword(dev, 17*4, &socket->saved_state[1]); - pci_disable_device(dev); - - /* - * Some laptops (IBM T22) do not like us putting the Cardbus - * bridge into D3. At a guess, some other laptop will - * probably require this, so leave it commented out for now. - */ - /* pci_set_power_state(dev, 3); */ - } + if (!socket) + return ret; + + if (socket->type && socket->type->save_state) + socket->type->save_state(socket); + + pci_save_state(pdev); + pci_read_config_dword(pdev, 16*4, &socket->saved_state[0]); + pci_read_config_dword(pdev, 17*4, &socket->saved_state[1]); + pci_disable_device(pdev); + + /* + * Some laptops (IBM T22) do not like us putting the Cardbus + * bridge into D3. At a guess, some other laptop will + * probably require this, so leave it commented out for now. + */ + /* pci_set_power_state(dev, 3); */ return ret; } - -static int yenta_dev_resume (struct pci_dev *dev) +static int yenta_dev_resume_noirq(struct device *dev) { - struct yenta_socket *socket = pci_get_drvdata(dev); + struct pci_dev *pdev = to_pci_dev(dev); + struct yenta_socket *socket = pci_get_drvdata(pdev); + int ret; - if (socket) { - int rc; + if (!socket) + return 0; - pci_set_power_state(dev, 0); - /* FIXME: pci_restore_state needs to have a better interface */ - pci_restore_state(dev); - pci_write_config_dword(dev, 16*4, socket->saved_state[0]); - pci_write_config_dword(dev, 17*4, socket->saved_state[1]); + pci_write_config_dword(pdev, 16*4, socket->saved_state[0]); + pci_write_config_dword(pdev, 17*4, socket->saved_state[1]); - rc = pci_enable_device(dev); - if (rc) - return rc; + ret = pci_enable_device(pdev); + if (ret) + return ret; - pci_set_master(dev); + pci_set_master(pdev); - if (socket->type && socket->type->restore_state) - socket->type->restore_state(socket); - } + if (socket->type && socket->type->restore_state) + socket->type->restore_state(socket); - return pcmcia_socket_dev_resume(&dev->dev); + pcmcia_socket_dev_early_resume(dev); + return 0; } + +static int yenta_dev_resume(struct device *dev) +{ + pcmcia_socket_dev_late_resume(dev); + return 0; +} + +static struct dev_pm_ops yenta_pm_ops = { + .suspend_noirq = yenta_dev_suspend_noirq, + .resume_noirq = yenta_dev_resume_noirq, + .resume = yenta_dev_resume, + .freeze_noirq = yenta_dev_suspend_noirq, + .thaw_noirq = yenta_dev_resume_noirq, + .thaw = yenta_dev_resume, + .poweroff_noirq = yenta_dev_suspend_noirq, + .restore_noirq = yenta_dev_resume_noirq, + .restore = yenta_dev_resume, +}; + +#define YENTA_PM_OPS (¥ta_pm_ops) +#else +#define YENTA_PM_OPS NULL #endif #define CB_ID(vend,dev,type) \ @@ -1376,10 +1397,7 @@ .id_table = yenta_table, .probe = yenta_probe, .remove = __devexit_p(yenta_close), -#ifdef CONFIG_PM - .suspend = yenta_dev_suspend, - .resume = yenta_dev_resume, -#endif + .driver.pm = YENTA_PM_OPS, }; --- linux-ec2-2.6.31.orig/drivers/pcmcia/i82092.c +++ linux-ec2-2.6.31/drivers/pcmcia/i82092.c @@ -42,7 +42,7 @@ #ifdef CONFIG_PM static int i82092aa_socket_suspend (struct pci_dev *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int i82092aa_socket_resume (struct pci_dev *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/sa1111_generic.c +++ linux-ec2-2.6.31/drivers/pcmcia/sa1111_generic.c @@ -159,7 +159,7 @@ static int pcmcia_suspend(struct sa1111_dev *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int pcmcia_resume(struct sa1111_dev *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/sa1100_generic.c +++ linux-ec2-2.6.31/drivers/pcmcia/sa1100_generic.c @@ -89,7 +89,7 @@ static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int sa11x0_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/i82365.c +++ linux-ec2-2.6.31/drivers/pcmcia/i82365.c @@ -1241,7 +1241,7 @@ static int i82365_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int i82365_drv_pcmcia_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/pcmcia/vrc4171_card.c +++ linux-ec2-2.6.31/drivers/pcmcia/vrc4171_card.c @@ -707,7 +707,7 @@ static int vrc4171_card_suspend(struct platform_device *dev, pm_message_t state) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(&dev->dev); } static int vrc4171_card_resume(struct platform_device *dev) --- linux-ec2-2.6.31.orig/drivers/xen/sys-hypervisor.c +++ linux-ec2-2.6.31/drivers/xen/sys-hypervisor.c @@ -18,6 +18,8 @@ #include #include +#include "xenbus/xenbus_comms.h" + #define HYPERVISOR_ATTR_RO(_name) \ static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) @@ -116,9 +118,8 @@ { char *vm, *val; int ret; - extern int xenstored_ready; - if (!xenstored_ready) + if (!is_xenstored_ready()) return -EBUSY; vm = xenbus_read(XBT_NIL, "vm", "", NULL); --- linux-ec2-2.6.31.orig/drivers/xen/util.c +++ linux-ec2-2.6.31/drivers/xen/util.c @@ -0,0 +1,20 @@ +#include +#include +#include + +struct class *get_xen_class(void) +{ + static struct class *xen_class; + + if (xen_class) + return xen_class; + + xen_class = class_create(THIS_MODULE, "xen"); + if (IS_ERR(xen_class)) { + printk("Failed to create xen sysfs class.\n"); + xen_class = NULL; + } + + return xen_class; +} +EXPORT_SYMBOL_GPL(get_xen_class); --- linux-ec2-2.6.31.orig/drivers/xen/Kconfig +++ linux-ec2-2.6.31/drivers/xen/Kconfig @@ -1,6 +1,390 @@ +# +# This Kconfig describe xen options +# + +config XEN + bool + select IRQ_PER_CPU if SMP + +if XEN +config XEN_INTERFACE_VERSION + hex + default 0x00030207 + +menu "XEN" + +config XEN_PRIVILEGED_GUEST + bool "Privileged Guest (domain 0)" + help + Support for privileged operation (domain 0) + +config XEN_UNPRIVILEGED_GUEST + def_bool !XEN_PRIVILEGED_GUEST + select PM + select PM_SLEEP + select PM_SLEEP_SMP if SMP + +config XEN_PRIVCMD + def_bool y + depends on PROC_FS + +config XEN_XENBUS_DEV + def_bool y + depends on PROC_FS + +config XEN_NETDEV_ACCEL_SFC_UTIL + depends on X86 + tristate + +config XEN_BACKEND + tristate "Backend driver support" + default XEN_PRIVILEGED_GUEST + help + Support for backend device drivers that provide I/O services + to other virtual machines. + +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + default XEN_BACKEND + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + +config XEN_BLKDEV_TAP + tristate "Block-device tap backend driver" + depends on XEN_BACKEND + default XEN_BACKEND + help + The block tap driver is an alternative to the block back driver + and allows VM block requests to be redirected to userspace through + a device interface. The tap allows user-space development of + high-performance block backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. This + driver can safely coexist with the existing blockback driver. + +config XEN_BLKDEV_TAP2 + tristate "Block-device tap backend driver 2" + depends on XEN_BACKEND + default XEN_BACKEND + help + The block tap driver is an alternative to the block back driver + and allows VM block requests to be redirected to userspace through + a device interface. The tap allows user-space development of + high-performance block backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. This + driver can safely coexist with the existing blockback driver. + +config XEN_BLKBACK_PAGEMAP + tristate + depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP2 != n + default XEN_BLKDEV_BACKEND || XEN_BLKDEV_TAP2 + +config XEN_NETDEV_BACKEND + tristate "Network-device backend driver" + depends on XEN_BACKEND && NET + default XEN_BACKEND + help + The network-device backend driver allows the kernel to export its + network devices to other guests via a high-performance shared-memory + interface. + +config XEN_NETDEV_TX_SHIFT + int "Maximum simultaneous transmit requests (as a power of 2)" + depends on XEN_NETDEV_BACKEND + range 5 16 + default 8 + help + The maximum number transmits the driver can hold pending, expressed + as the exponent of a power of 2. + +config XEN_NETDEV_PIPELINED_TRANSMITTER + bool "Pipelined transmitter (DANGEROUS)" + depends on XEN_NETDEV_BACKEND + help + If the net backend is a dumb domain, such as a transparent Ethernet + bridge with no local IP interface, it is safe to say Y here to get + slightly lower network overhead. + If the backend has a local IP interface; or may be doing smart things + like reassembling packets to perform firewall filtering; or if you + are unsure; or if you experience network hangs when this option is + enabled; then you must say N here. + +config XEN_NETDEV_ACCEL_SFC_BACKEND + tristate "Network-device backend driver acceleration for Solarflare NICs" + depends on XEN_NETDEV_BACKEND && SFC && SFC_RESOURCE && X86 + select XEN_NETDEV_ACCEL_SFC_UTIL + default m + +config XEN_NETDEV_LOOPBACK + tristate "Network-device loopback driver" + depends on XEN_NETDEV_BACKEND + help + A two-interface loopback device to emulate a local netfront-netback + connection. If unsure, it is probably safe to say N here. + +config XEN_PCIDEV_BACKEND + tristate "PCI-device backend driver" + depends on PCI && XEN_PRIVILEGED_GUEST && XEN_BACKEND + default XEN_BACKEND + help + The PCI device backend driver allows the kernel to export arbitrary + PCI devices to other guests. If you select this to be a module, you + will need to make sure no other driver has bound to the device(s) + you want to make visible to other guests. + +choice + prompt "PCI Backend Mode" + depends on XEN_PCIDEV_BACKEND + default XEN_PCIDEV_BACKEND_CONTROLLER if IA64 + default XEN_PCIDEV_BACKEND_VPCI + +config XEN_PCIDEV_BACKEND_VPCI + bool "Virtual PCI" + ---help--- + This PCI Backend hides the true PCI topology and makes the frontend + think there is a single PCI bus with only the exported devices on it. + For example, a device at 03:05.0 will be re-assigned to 00:00.0. A + second device at 02:1a.1 will be re-assigned to 00:01.1. + +config XEN_PCIDEV_BACKEND_PASS + bool "Passthrough" + ---help--- + This PCI Backend provides a real view of the PCI topology to the + frontend (for example, a device at 06:01.b will still appear at + 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed + PCI devices to its driver domains. This may be required for drivers + which depend on finding their hardward in certain bus/slot + locations. + +config XEN_PCIDEV_BACKEND_SLOT + bool "Slot" + ---help--- + This PCI Backend hides the true PCI topology and makes the frontend + think there is a single PCI bus with only the exported devices on it. + Contrary to the virtual PCI backend, a function becomes a new slot. + For example, a device at 03:05.2 will be re-assigned to 00:00.0. A + second device at 02:1a.1 will be re-assigned to 00:01.0. + +config XEN_PCIDEV_BACKEND_CONTROLLER + bool "Controller" + depends on IA64 + ---help--- + This PCI backend virtualizes the PCI bus topology by providing a + virtual bus per PCI root device. Devices which are physically under + the same root bus will appear on the same virtual bus. For systems + with complex I/O addressing, this is the only backend which supports + extended I/O port spaces and MMIO translation offsets. This backend + also supports slot virtualization. For example, a device at + 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device + at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be + re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under + a different PCI root bus) will be re-assigned to 0000:01:00.0. + +endchoice + +config XEN_PCIDEV_BE_DEBUG + bool "PCI Backend Debugging" + depends on XEN_PCIDEV_BACKEND + +config XEN_TPMDEV_BACKEND + tristate "TPM-device backend driver" + depends on XEN_BACKEND + help + The TPM-device backend driver + +config XEN_SCSI_BACKEND + tristate "SCSI backend driver" + depends on SCSI && XEN_BACKEND + default m + help + The SCSI backend driver allows the kernel to export its SCSI Devices + to other guests via a high-performance shared-memory interface. + +config XEN_USB_BACKEND + tristate "USB backend driver" + depends on USB && XEN_BACKEND + default m + help + The USB backend driver allows the kernel to export its USB Devices + to other guests. + +config XEN_BLKDEV_FRONTEND + tristate "Block-device frontend driver" + default y + help + The block-device frontend driver allows the kernel to access block + devices mounted within another guest OS. Unless you are building a + dedicated device-driver domain, or your master control domain + (domain 0), then you almost certainly want to say Y here. + +config XEN_NETDEV_FRONTEND + tristate "Network-device frontend driver" + depends on NET + default y + help + The network-device frontend driver allows the kernel to access + network interfaces within another guest OS. Unless you are building a + dedicated device-driver domain, or your master control domain + (domain 0), then you almost certainly want to say Y here. + +config XEN_NETDEV_ACCEL_SFC_FRONTEND + tristate "Network-device frontend driver acceleration for Solarflare NICs" + depends on XEN_NETDEV_FRONTEND && X86 + select XEN_NETDEV_ACCEL_SFC_UTIL + default m + +config XEN_SCSI_FRONTEND + tristate "SCSI frontend driver" + depends on SCSI + default m + help + The SCSI frontend driver allows the kernel to access SCSI Devices + within another guest OS. + +config XEN_USB_FRONTEND + tristate "USB frontend driver" + depends on USB + default m + help + The USB frontend driver allows the kernel to access USB Devices + within another guest OS. + +config XEN_USB_FRONTEND_HCD_STATS + bool "Taking the HCD statistics (for debug)" + depends on XEN_USB_FRONTEND + default y + help + Count the transferred urb status and the RING_FULL occurrence. + +config XEN_GRANT_DEV + tristate "User-space granted page access driver" + default XEN_PRIVILEGED_GUEST + help + Device for accessing (in user-space) pages that have been granted + by other domains. + +config XEN_FRAMEBUFFER + tristate "Framebuffer-device frontend driver" + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + default y + help + The framebuffer-device frontend drivers allows the kernel to create a + virtual framebuffer. This framebuffer can be viewed in another + domain. Unless this domain has access to a real video card, you + probably want to say Y here. + +config XEN_KEYBOARD + tristate "Keyboard-device frontend driver" + depends on XEN_FRAMEBUFFER && INPUT + default y + help + The keyboard-device frontend driver allows the kernel to create a + virtual keyboard. This keyboard can then be driven by another + domain. If you've said Y to CONFIG_XEN_FRAMEBUFFER, you probably + want to say Y here. + +config XEN_DISABLE_SERIAL + bool "Disable serial port drivers" + default y + help + Disable serial port drivers, allowing the Xen console driver + to provide a serial console at ttyS0. + +config XEN_SYSFS + tristate "Export Xen attributes in sysfs" + depends on SYSFS + select SYS_HYPERVISOR + default y + help + Xen hypervisor attributes will show up under /sys/hypervisor/. + +config XEN_NR_GUEST_DEVICES + int "Number of guest devices" + range 0 4032 if 64BIT + range 0 960 + default 256 if XEN_BACKEND + default 16 + help + Specify the total number of virtual devices (i.e. both frontend + and backend) that you want the kernel to be able to service. + +choice + prompt "Xen version compatibility" + default XEN_COMPAT_030002_AND_LATER + + config XEN_COMPAT_030002_AND_LATER + bool "3.0.2 and later" + + config XEN_COMPAT_030004_AND_LATER + bool "3.0.4 and later" + + config XEN_COMPAT_030100_AND_LATER + bool "3.1.0 and later" + + config XEN_COMPAT_030200_AND_LATER + bool "3.2.0 and later" + + config XEN_COMPAT_030300_AND_LATER + bool "3.3.0 and later" + + config XEN_COMPAT_LATEST_ONLY + bool "no compatibility code" + +endchoice + +config XEN_COMPAT + hex + default 0xffffff if XEN_COMPAT_LATEST_ONLY + default 0x030300 if XEN_COMPAT_030300_AND_LATER + default 0x030200 if XEN_COMPAT_030200_AND_LATER + default 0x030100 if XEN_COMPAT_030100_AND_LATER + default 0x030004 if XEN_COMPAT_030004_AND_LATER + default 0x030002 if XEN_COMPAT_030002_AND_LATER + default 0 + +config XEN_VCPU_INFO_PLACEMENT + bool "Place shared vCPU info in per-CPU storage" +# depends on X86 && (XEN_COMPAT >= 0x00030101) + depends on X86 + depends on !XEN_COMPAT_030002_AND_LATER + depends on !XEN_COMPAT_030004_AND_LATER + depends on !XEN_COMPAT_030100_AND_LATER + default SMP + ---help--- + This allows faster access to the per-vCPU shared info + structure. + +endmenu + +config HAVE_IRQ_IGNORE_UNHANDLED + def_bool y + +config IRQ_PER_CPU + bool + +config NO_IDLE_HZ + def_bool y + +config XEN_SMPBOOT + def_bool y + depends on SMP && !PPC_XEN + +config XEN_XENCOMM + bool + +config XEN_DEVMEM + def_bool y + +endif + config XEN_BALLOON - bool "Xen memory balloon driver" - depends on XEN + bool "Xen memory balloon driver" if PARAVIRT_XEN + depends on (XEN && !PPC_XEN) || PARAVIRT_XEN default y help The balloon driver allows the Xen domain to request more memory from @@ -8,19 +392,21 @@ return unneeded memory to the system. config XEN_SCRUB_PAGES - bool "Scrub pages before returning them to system" - depends on XEN_BALLOON + bool "Scrub memory before freeing it to Xen" + depends on XEN || XEN_BALLOON default y help - Scrub pages before returning them to the system for reuse by - other domains. This makes sure that any confidential data - is not accidentally visible to other domains. Is it more - secure, but slightly less efficient. + Erase memory contents before freeing it back to Xen's global + pool. This ensures that any secrets contained within that + memory (e.g., private keys) cannot be found by other guests that + may be running on the machine. Most people will want to say Y here. + If security is not a concern then you may increase performance by + saying N. If in doubt, say yes. config XEN_DEV_EVTCHN tristate "Xen /dev/xen/evtchn device" - depends on XEN + depends on PARAVIRT_XEN default y help The evtchn driver allows a userspace process to triger event @@ -30,7 +416,7 @@ config XENFS tristate "Xen filesystem" - depends on XEN + depends on PARAVIRT_XEN default y help The xen filesystem provides a way for domains to share @@ -53,7 +439,7 @@ config XEN_SYS_HYPERVISOR bool "Create xen entries under /sys/hypervisor" - depends on XEN && SYSFS + depends on PARAVIRT_XEN && SYSFS select SYS_HYPERVISOR default y help --- linux-ec2-2.6.31.orig/drivers/xen/Makefile +++ linux-ec2-2.6.31/drivers/xen/Makefile @@ -1,9 +1,42 @@ -obj-y += grant-table.o features.o events.o manage.o -obj-y += xenbus/ +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o +xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o +xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o +xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -obj-$(CONFIG_XEN_BALLOON) += balloon.o +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_features.o := $(nostackp) + +xen-balloon-$(CONFIG_XEN) := balloon/ +obj-$(CONFIG_XEN) += core/ +obj-$(CONFIG_XEN) += console/ +obj-$(CONFIG_XEN) += evtchn/ +obj-y += xenbus/ +obj-$(CONFIG_XEN) += char/ + +obj-$(CONFIG_XEN) += util.o +obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y) +obj-$(CONFIG_XEN_XENCOMM) += $(xen-xencomm-y) +obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y) obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XENFS) += xenfs/ -obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ +obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ +obj-$(CONFIG_XEN_FRAMEBUFFER) += fbfront/ +obj-$(CONFIG_XEN_KEYBOARD) += fbfront/ +obj-$(CONFIG_XEN_SCSI_BACKEND) += scsiback/ +obj-$(CONFIG_XEN_SCSI_FRONTEND) += scsifront/ +obj-$(CONFIG_XEN_USB_BACKEND) += usbback/ +obj-$(CONFIG_XEN_USB_FRONTEND) += usbfront/ +obj-$(CONFIG_XEN_PRIVCMD) += privcmd/ +obj-$(CONFIG_XEN_GRANT_DEV) += gntdev/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) += sfc_netutil/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) += sfc_netfront/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) += sfc_netback/ --- linux-ec2-2.6.31.orig/drivers/xen/fbfront/xenkbd.c +++ linux-ec2-2.6.31/drivers/xen/fbfront/xenkbd.c @@ -0,0 +1,354 @@ +/* + * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + * + * Based on linux/drivers/input/mouse/sermouse.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +/* + * TODO: + * + * Switch to grant tables together with xenfb.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct xenkbd_info +{ + struct input_dev *kbd; + struct input_dev *ptr; + struct xenkbd_page *page; + int irq; + struct xenbus_device *xbdev; + char phys[32]; +}; + +static int xenkbd_remove(struct xenbus_device *); +static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); +static void xenkbd_disconnect_backend(struct xenkbd_info *); + +/* + * Note: if you need to send out events, see xenfb_do_update() for how + * to do that. + */ + +static irqreturn_t input_handler(int rq, void *dev_id) +{ + struct xenkbd_info *info = dev_id; + struct xenkbd_page *page = info->page; + __u32 cons, prod; + + prod = page->in_prod; + if (prod == page->in_cons) + return IRQ_HANDLED; + rmb(); /* ensure we see ring contents up to prod */ + for (cons = page->in_cons; cons != prod; cons++) { + union xenkbd_in_event *event; + struct input_dev *dev; + event = &XENKBD_IN_RING_REF(page, cons); + + dev = info->ptr; + switch (event->type) { + case XENKBD_TYPE_MOTION: + if (event->motion.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->motion.rel_z); + input_report_rel(dev, REL_X, event->motion.rel_x); + input_report_rel(dev, REL_Y, event->motion.rel_y); + break; + case XENKBD_TYPE_KEY: + dev = NULL; + if (test_bit(event->key.keycode, info->kbd->keybit)) + dev = info->kbd; + if (test_bit(event->key.keycode, info->ptr->keybit)) + dev = info->ptr; + if (dev) + input_report_key(dev, event->key.keycode, + event->key.pressed); + else + printk("xenkbd: unhandled keycode 0x%x\n", + event->key.keycode); + break; + case XENKBD_TYPE_POS: + if (event->pos.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->pos.rel_z); + input_report_abs(dev, ABS_X, event->pos.abs_x); + input_report_abs(dev, ABS_Y, event->pos.abs_y); + break; + } + if (dev) + input_sync(dev); + } + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_irq(info->irq); + + return IRQ_HANDLED; +} + +int __devinit xenkbd_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int ret, i; + struct xenkbd_info *info; + struct input_dev *kbd, *ptr; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + dev_set_drvdata(&dev->dev, info); + info->xbdev = dev; + snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename); + + info->page = (void *)__get_free_page(GFP_KERNEL); + if (!info->page) + goto error_nomem; + info->page->in_cons = info->page->in_prod = 0; + info->page->out_cons = info->page->out_prod = 0; + + /* keyboard */ + kbd = input_allocate_device(); + if (!kbd) + goto error_nomem; + kbd->name = "Xen Virtual Keyboard"; + kbd->phys = info->phys; + kbd->id.bustype = BUS_PCI; + kbd->id.vendor = 0x5853; + kbd->id.product = 0xffff; + kbd->evbit[0] = BIT(EV_KEY); + for (i = KEY_ESC; i < KEY_UNKNOWN; i++) + set_bit(i, kbd->keybit); + for (i = KEY_OK; i < KEY_MAX; i++) + set_bit(i, kbd->keybit); + + ret = input_register_device(kbd); + if (ret) { + input_free_device(kbd); + xenbus_dev_fatal(dev, ret, "input_register_device(kbd)"); + goto error; + } + info->kbd = kbd; + + /* pointing device */ + ptr = input_allocate_device(); + if (!ptr) + goto error_nomem; + ptr->name = "Xen Virtual Pointer"; + ptr->phys = info->phys; + ptr->id.bustype = BUS_PCI; + ptr->id.vendor = 0x5853; + ptr->id.product = 0xfffe; + ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); + for (i = BTN_LEFT; i <= BTN_TASK; i++) + set_bit(i, ptr->keybit); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); + input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); + input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); + + ret = input_register_device(ptr); + if (ret) { + input_free_device(ptr); + xenbus_dev_fatal(dev, ret, "input_register_device(ptr)"); + goto error; + } + info->ptr = ptr; + + ret = xenkbd_connect_backend(dev, info); + if (ret < 0) + goto error; + + return 0; + + error_nomem: + ret = -ENOMEM; + xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: + xenkbd_remove(dev); + return ret; +} + +static int xenkbd_resume(struct xenbus_device *dev) +{ + struct xenkbd_info *info = dev_get_drvdata(&dev->dev); + + xenkbd_disconnect_backend(info); + info->page->in_cons = info->page->in_prod = 0; + info->page->out_cons = info->page->out_prod = 0; + return xenkbd_connect_backend(dev, info); +} + +static int xenkbd_remove(struct xenbus_device *dev) +{ + struct xenkbd_info *info = dev_get_drvdata(&dev->dev); + + xenkbd_disconnect_backend(info); + input_unregister_device(info->kbd); + input_unregister_device(info->ptr); + free_page((unsigned long)info->page); + kfree(info); + return 0; +} + +static int xenkbd_connect_backend(struct xenbus_device *dev, + struct xenkbd_info *info) +{ + int ret; + struct xenbus_transaction xbt; + + ret = bind_listening_port_to_irqhandler( + dev->otherend_id, input_handler, 0, "xenkbd", info); + if (ret < 0) { + xenbus_dev_fatal(dev, ret, + "bind_listening_port_to_irqhandler"); + return ret; + } + info->irq = ret; + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + return ret; + } + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", + virt_to_mfn(info->page)); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(info->irq)); + if (ret) + goto error_xenbus; + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + return ret; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + return ret; +} + +static void xenkbd_disconnect_backend(struct xenkbd_info *info) +{ + if (info->irq >= 0) + unbind_from_irqhandler(info->irq, info); + info->irq = -1; +} + +static void xenkbd_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct xenkbd_info *info = dev_get_drvdata(&dev->dev); + int ret, val; + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: + InitWait: + ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-abs-pointer", "%d", &val); + if (ret < 0) + val = 0; + if (val) { + ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, + "request-abs-pointer", "1"); + if (ret) + ; /* FIXME */ + } + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateConnected: + /* + * Work around xenbus race condition: If backend goes + * through InitWait to Connected fast enough, we can + * get Connected twice here. + */ + if (dev->state != XenbusStateConnected) + goto InitWait; /* no InitWait seen yet, fudge it */ + + /* Set input abs params to match backend screen res */ + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "width", "%d", &val) > 0 ) + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "height", "%d", &val) > 0 ) + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); + + break; + + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static const struct xenbus_device_id xenkbd_ids[] = { + { "vkbd" }, + { "" } +}; +MODULE_ALIAS("xen:vkbd"); + +static struct xenbus_driver xenkbd_driver = { + .name = "vkbd", + .ids = xenkbd_ids, + .probe = xenkbd_probe, + .remove = xenkbd_remove, + .resume = xenkbd_resume, + .otherend_changed = xenkbd_backend_changed, +}; + +static int __init xenkbd_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + /* Nothing to do if running in dom0. */ + if (is_initial_xendomain()) + return -ENODEV; + + return xenbus_register_frontend(&xenkbd_driver); +} + +static void __exit xenkbd_cleanup(void) +{ + return xenbus_unregister_driver(&xenkbd_driver); +} + +module_init(xenkbd_init); +module_exit(xenkbd_cleanup); + +MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend"); +MODULE_LICENSE("GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/fbfront/xenfb.c +++ linux-ec2-2.6.31/drivers/xen/fbfront/xenfb.c @@ -0,0 +1,910 @@ +/* + * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device + * + * Copyright (C) 2005-2006 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + * + * Based on linux/drivers/video/q40fb.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +/* + * TODO: + * + * Switch to grant tables when they become capable of dealing with the + * frame buffer. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct xenfb_mapping +{ + struct list_head link; + struct vm_area_struct *vma; + atomic_t map_refs; + int faults; + struct xenfb_info *info; +}; + +struct xenfb_info +{ + struct task_struct *kthread; + wait_queue_head_t wq; + + unsigned char *fb; + struct fb_info *fb_info; + struct timer_list refresh; + int dirty; + int x1, y1, x2, y2; /* dirty rectangle, + protected by dirty_lock */ + spinlock_t dirty_lock; + struct mutex mm_lock; + int nr_pages; + struct page **pages; + struct list_head mappings; /* protected by mm_lock */ + + int irq; + struct xenfb_page *page; + unsigned long *mfns; + int feature_resize; /* Backend has resize feature */ + struct xenfb_resize resize; + int resize_dpy; + spinlock_t resize_lock; + + struct xenbus_device *xbdev; +}; + +/* + * There are three locks: + * spinlock resize_lock protecting resize_dpy and resize + * spinlock dirty_lock protecting the dirty rectangle + * mutex mm_lock protecting mappings. + * + * How the dirty and mapping locks work together + * + * The problem is that dirty rectangle and mappings aren't + * independent: the dirty rectangle must cover all faulted pages in + * mappings. We need to prove that our locking maintains this + * invariant. + * + * There are several kinds of critical regions: + * + * 1. Holding only dirty_lock: xenfb_refresh(). May run in + * interrupts. Extends the dirty rectangle. Trivially preserves + * invariant. + * + * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close(). Touch + * only mappings. The former creates unfaulted pages. Preserves + * invariant. The latter removes pages. Preserves invariant. + * + * 3. Holding both locks: xenfb_vm_fault(). Extends the dirty + * rectangle and updates mappings consistently. Preserves + * invariant. + * + * 4. The ugliest one: xenfb_update_screen(). Clear the dirty + * rectangle and update mappings consistently. + * + * We can't simply hold both locks, because zap_page_range() cannot + * be called with a spinlock held. + * + * Therefore, we first clear the dirty rectangle with both locks + * held. Then we unlock dirty_lock and update the mappings. + * Critical regions that hold only dirty_lock may interfere with + * that. This can only be region 1: xenfb_refresh(). But that + * just extends the dirty rectangle, which can't harm the + * invariant. + * + * But FIXME: the invariant is too weak. It misses that the fault + * record in mappings must be consistent with the mapping of pages in + * the associated address space! __do_fault() updates the PTE after + * xenfb_vm_fault() returns, i.e. outside the critical region. This + * allows the following race: + * + * X writes to some address in the Xen frame buffer + * Fault - call __do_fault() + * call xenfb_vm_fault() + * grab mm_lock + * map->faults++; + * release mm_lock + * return back to do_no_page() + * (preempted, or SMP) + * Xen worker thread runs. + * grab mm_lock + * look at mappings + * find this mapping, zaps its pages (but page not in pte yet) + * clear map->faults + * releases mm_lock + * (back to X process) + * put page in X's pte + * + * Oh well, we wont be updating the writes to this page anytime soon. + */ +#define MB_ (1024*1024) +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) + +enum {KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT}; +static int video[KPARAM_CNT] = {2, XENFB_WIDTH, XENFB_HEIGHT}; +module_param_array(video, int, NULL, 0); +MODULE_PARM_DESC(video, + "Size of video memory in MB and width,height in pixels, default = (2,800,600)"); + +static int xenfb_fps = 20; + +static int xenfb_remove(struct xenbus_device *); +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); +static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); +static void xenfb_disconnect_backend(struct xenfb_info *); + +static void xenfb_send_event(struct xenfb_info *info, + union xenfb_out_event *event) +{ + __u32 prod; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = *event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + +static void xenfb_do_update(struct xenfb_info *info, + int x, int y, int w, int h) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.type = XENFB_TYPE_UPDATE; + event.update.x = x; + event.update.y = y; + event.update.width = w; + event.update.height = h; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); +} + +static void xenfb_do_resize(struct xenfb_info *info) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.resize = info->resize; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); +} + +static int xenfb_queue_full(struct xenfb_info *info) +{ + __u32 cons, prod; + + prod = info->page->out_prod; + cons = info->page->out_cons; + return prod - cons == XENFB_OUT_RING_LEN; +} + +static void xenfb_update_screen(struct xenfb_info *info) +{ + unsigned long flags; + int y1, y2, x1, x2; + struct xenfb_mapping *map; + + if (xenfb_queue_full(info)) + return; + + mutex_lock(&info->mm_lock); + + spin_lock_irqsave(&info->dirty_lock, flags); + if (info->dirty){ + info->dirty = 0; + y1 = info->y1; + y2 = info->y2; + x1 = info->x1; + x2 = info->x2; + info->x1 = info->y1 = INT_MAX; + info->x2 = info->y2 = 0; + } else { + spin_unlock_irqrestore(&info->dirty_lock, flags); + mutex_unlock(&info->mm_lock); + return; + } + spin_unlock_irqrestore(&info->dirty_lock, flags); + + list_for_each_entry(map, &info->mappings, link) { + if (!map->faults) + continue; + zap_page_range(map->vma, map->vma->vm_start, + map->vma->vm_end - map->vma->vm_start, NULL); + map->faults = 0; + } + + mutex_unlock(&info->mm_lock); + + if (x2 < x1 || y2 < y1) { + printk("xenfb_update_screen bogus rect %d %d %d %d\n", + x1, x2, y1, y2); + WARN_ON(1); + } + xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1); +} + +static void xenfb_handle_resize_dpy(struct xenfb_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->resize_lock, flags); + if (info->resize_dpy) { + if (!xenfb_queue_full(info)) { + info->resize_dpy = 0; + xenfb_do_resize(info); + } + } + spin_unlock_irqrestore(&info->resize_lock, flags); +} + +static int xenfb_thread(void *data) +{ + struct xenfb_info *info = data; + + while (!kthread_should_stop()) { + xenfb_handle_resize_dpy(info); + xenfb_update_screen(info); + wait_event_interruptible(info->wq, + kthread_should_stop() || info->dirty); + try_to_freeze(); + } + return 0; +} + +static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green, + unsigned blue, unsigned transp, + struct fb_info *info) +{ + u32 v; + + if (regno > info->cmap.len) + return 1; + + red >>= (16 - info->var.red.length); + green >>= (16 - info->var.green.length); + blue >>= (16 - info->var.blue.length); + + v = (red << info->var.red.offset) | + (green << info->var.green.offset) | + (blue << info->var.blue.offset); + + /* FIXME is this sane? check against xxxfb_setcolreg()! */ + switch (info->var.bits_per_pixel) { + case 16: + case 24: + case 32: + ((u32 *)info->pseudo_palette)[regno] = v; + break; + } + + return 0; +} + +static void xenfb_timer(unsigned long data) +{ + struct xenfb_info *info = (struct xenfb_info *)data; + wake_up(&info->wq); +} + +static void __xenfb_refresh(struct xenfb_info *info, + int x1, int y1, int w, int h) +{ + int y2, x2; + + y2 = y1 + h; + x2 = x1 + w; + + if (info->y1 > y1) + info->y1 = y1; + if (info->y2 < y2) + info->y2 = y2; + if (info->x1 > x1) + info->x1 = x1; + if (info->x2 < x2) + info->x2 = x2; + info->dirty = 1; + + if (timer_pending(&info->refresh)) + return; + + mod_timer(&info->refresh, jiffies + HZ/xenfb_fps); +} + +static void xenfb_refresh(struct xenfb_info *info, + int x1, int y1, int w, int h) +{ + unsigned long flags; + + spin_lock_irqsave(&info->dirty_lock, flags); + __xenfb_refresh(info, x1, y1, w, h); + spin_unlock_irqrestore(&info->dirty_lock, flags); +} + +static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect) +{ + struct xenfb_info *info = p->par; + + cfb_fillrect(p, rect); + xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height); +} + +static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image) +{ + struct xenfb_info *info = p->par; + + cfb_imageblit(p, image); + xenfb_refresh(info, image->dx, image->dy, image->width, image->height); +} + +static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) +{ + struct xenfb_info *info = p->par; + + cfb_copyarea(p, area); + xenfb_refresh(info, area->dx, area->dy, area->width, area->height); +} + +static void xenfb_vm_open(struct vm_area_struct *vma) +{ + struct xenfb_mapping *map = vma->vm_private_data; + atomic_inc(&map->map_refs); +} + +static void xenfb_vm_close(struct vm_area_struct *vma) +{ + struct xenfb_mapping *map = vma->vm_private_data; + struct xenfb_info *info = map->info; + + mutex_lock(&info->mm_lock); + if (atomic_dec_and_test(&map->map_refs)) { + list_del(&map->link); + kfree(map); + } + mutex_unlock(&info->mm_lock); +} + +static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct xenfb_mapping *map = vma->vm_private_data; + struct xenfb_info *info = map->info; + int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; + unsigned long flags; + struct page *page; + int y1, y2; + + if (pgnr >= info->nr_pages) + return VM_FAULT_SIGBUS; + + mutex_lock(&info->mm_lock); + spin_lock_irqsave(&info->dirty_lock, flags); + page = info->pages[pgnr]; + get_page(page); + map->faults++; + + y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length; + y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length; + if (y2 > info->fb_info->var.yres) + y2 = info->fb_info->var.yres; + __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1); + spin_unlock_irqrestore(&info->dirty_lock, flags); + mutex_unlock(&info->mm_lock); + + vmf->page = page; + + return VM_FAULT_MINOR; +} + +static struct vm_operations_struct xenfb_vm_ops = { + .open = xenfb_vm_open, + .close = xenfb_vm_close, + .fault = xenfb_vm_fault, +}; + +static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma) +{ + struct xenfb_info *info = fb_info->par; + struct xenfb_mapping *map; + int map_pages; + + if (!(vma->vm_flags & VM_WRITE)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_pgoff != 0) + return -EINVAL; + + map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT; + if (map_pages > info->nr_pages) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) + return -ENOMEM; + + map->vma = vma; + map->faults = 0; + map->info = info; + atomic_set(&map->map_refs, 1); + + mutex_lock(&info->mm_lock); + list_add(&map->link, &info->mappings); + mutex_unlock(&info->mm_lock); + + vma->vm_ops = &xenfb_vm_ops; + vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED); + vma->vm_private_data = map; + + return 0; +} + +static int +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + int required_mem_len; + + xenfb_info = info->par; + + if (!xenfb_info->feature_resize) { + if (var->xres == video[KPARAM_WIDTH] && + var->yres == video[KPARAM_HEIGHT] && + var->bits_per_pixel == xenfb_info->page->depth) { + return 0; + } + return -EINVAL; + } + + /* Can't resize past initial width and height */ + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) + return -EINVAL; + + required_mem_len = var->xres * var->yres * (xenfb_info->page->depth / 8); + if (var->bits_per_pixel == xenfb_info->page->depth && + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && + required_mem_len <= info->fix.smem_len) { + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + return 0; + } + return -EINVAL; +} + +static int xenfb_set_par(struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + unsigned long flags; + + xenfb_info = info->par; + + spin_lock_irqsave(&xenfb_info->resize_lock, flags); + xenfb_info->resize.type = XENFB_TYPE_RESIZE; + xenfb_info->resize.width = info->var.xres; + xenfb_info->resize.height = info->var.yres; + xenfb_info->resize.stride = info->fix.line_length; + xenfb_info->resize.depth = info->var.bits_per_pixel; + xenfb_info->resize.offset = 0; + xenfb_info->resize_dpy = 1; + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); + return 0; +} + +static struct fb_ops xenfb_fb_ops = { + .owner = THIS_MODULE, + .fb_setcolreg = xenfb_setcolreg, + .fb_fillrect = xenfb_fillrect, + .fb_copyarea = xenfb_copyarea, + .fb_imageblit = xenfb_imageblit, + .fb_mmap = xenfb_mmap, + .fb_check_var = xenfb_check_var, + .fb_set_par = xenfb_set_par, +}; + +static irqreturn_t xenfb_event_handler(int rq, void *dev_id) +{ + /* + * No in events recognized, simply ignore them all. + * If you need to recognize some, see xenbkd's input_handler() + * for how to do that. + */ + struct xenfb_info *info = dev_id; + struct xenfb_page *page = info->page; + + if (page->in_cons != page->in_prod) { + info->page->in_cons = info->page->in_prod; + notify_remote_via_irq(info->irq); + } + return IRQ_HANDLED; +} + +static unsigned long vmalloc_to_mfn(void *address) +{ + return pfn_to_mfn(vmalloc_to_pfn(address)); +} + +static __devinit void +xenfb_make_preferred_console(void) +{ + struct console *c; + + if (console_set_on_cmdline) + return; + + acquire_console_sem(); + for (c = console_drivers; c; c = c->next) { + if (!strcmp(c->name, "tty") && c->index == 0) + break; + } + release_console_sem(); + if (c) { + unregister_console(c); + c->flags |= CON_CONSDEV; + c->flags &= ~CON_PRINTBUFFER; /* don't print again */ + register_console(c); + } +} + +static int __devinit xenfb_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct xenfb_info *info; + struct fb_info *fb_info; + int fb_size; + int val; + int ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + + /* Limit kernel param videoram amount to what is in xenstore */ + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { + if (val < video[KPARAM_MEM]) + video[KPARAM_MEM] = val; + } + + /* If requested res does not fit in available memory, use default */ + fb_size = video[KPARAM_MEM] * MB_; + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH/8 > fb_size) { + video[KPARAM_WIDTH] = XENFB_WIDTH; + video[KPARAM_HEIGHT] = XENFB_HEIGHT; + fb_size = XENFB_DEFAULT_FB_LEN; + } + + dev_set_drvdata(&dev->dev, info); + info->xbdev = dev; + info->irq = -1; + info->x1 = info->y1 = INT_MAX; + spin_lock_init(&info->dirty_lock); + spin_lock_init(&info->resize_lock); + mutex_init(&info->mm_lock); + init_waitqueue_head(&info->wq); + init_timer(&info->refresh); + info->refresh.function = xenfb_timer; + info->refresh.data = (unsigned long)info; + INIT_LIST_HEAD(&info->mappings); + + info->fb = vmalloc(fb_size); + if (info->fb == NULL) + goto error_nomem; + memset(info->fb, 0, fb_size); + + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + info->pages = kmalloc(sizeof(struct page *) * info->nr_pages, + GFP_KERNEL); + if (info->pages == NULL) + goto error_nomem; + + info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); + if (!info->mfns) + goto error_nomem; + + /* set up shared page */ + info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!info->page) + goto error_nomem; + + fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); + /* see fishy hackery below */ + if (fb_info == NULL) + goto error_nomem; + + /* FIXME fishy hackery */ + fb_info->pseudo_palette = fb_info->par; + fb_info->par = info; + /* /FIXME */ + fb_info->screen_base = info->fb; + + fb_info->fbops = &xenfb_fb_ops; + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; + fb_info->var.bits_per_pixel = XENFB_DEPTH; + + fb_info->var.red = (struct fb_bitfield){16, 8, 0}; + fb_info->var.green = (struct fb_bitfield){8, 8, 0}; + fb_info->var.blue = (struct fb_bitfield){0, 8, 0}; + + fb_info->var.activate = FB_ACTIVATE_NOW; + fb_info->var.height = -1; + fb_info->var.width = -1; + fb_info->var.vmode = FB_VMODE_NONINTERLACED; + + fb_info->fix.visual = FB_VISUAL_TRUECOLOR; + fb_info->fix.line_length = fb_info->var.xres * (XENFB_DEPTH / 8); + fb_info->fix.smem_start = 0; + fb_info->fix.smem_len = fb_size; + strcpy(fb_info->fix.id, "xen"); + fb_info->fix.type = FB_TYPE_PACKED_PIXELS; + fb_info->fix.accel = FB_ACCEL_NONE; + + fb_info->flags = FBINFO_FLAG_DEFAULT; + + ret = fb_alloc_cmap(&fb_info->cmap, 256, 0); + if (ret < 0) { + framebuffer_release(fb_info); + xenbus_dev_fatal(dev, ret, "fb_alloc_cmap"); + goto error; + } + + xenfb_init_shared_page(info, fb_info); + + ret = register_framebuffer(fb_info); + if (ret) { + fb_dealloc_cmap(&info->fb_info->cmap); + framebuffer_release(fb_info); + xenbus_dev_fatal(dev, ret, "register_framebuffer"); + goto error; + } + info->fb_info = fb_info; + + ret = xenfb_connect_backend(dev, info); + if (ret < 0) + goto error; + + xenfb_make_preferred_console(); + return 0; + + error_nomem: + ret = -ENOMEM; + xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: + xenfb_remove(dev); + return ret; +} + +static int xenfb_resume(struct xenbus_device *dev) +{ + struct xenfb_info *info = dev_get_drvdata(&dev->dev); + + xenfb_disconnect_backend(info); + xenfb_init_shared_page(info, info->fb_info); + return xenfb_connect_backend(dev, info); +} + +static int xenfb_remove(struct xenbus_device *dev) +{ + struct xenfb_info *info = dev_get_drvdata(&dev->dev); + + del_timer(&info->refresh); + if (info->kthread) + kthread_stop(info->kthread); + xenfb_disconnect_backend(info); + if (info->fb_info) { + unregister_framebuffer(info->fb_info); + fb_dealloc_cmap(&info->fb_info->cmap); + framebuffer_release(info->fb_info); + } + free_page((unsigned long)info->page); + vfree(info->mfns); + kfree(info->pages); + vfree(info->fb); + kfree(info); + + return 0; +} + +static void xenfb_init_shared_page(struct xenfb_info *info, + struct fb_info * fb_info) +{ + int i; + int epd = PAGE_SIZE / sizeof(info->mfns[0]); + + for (i = 0; i < info->nr_pages; i++) + info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE); + + for (i = 0; i < info->nr_pages; i++) + info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); + + for (i = 0; i * epd < info->nr_pages; i++) + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); + + info->page->width = fb_info->var.xres; + info->page->height = fb_info->var.yres; + info->page->depth = fb_info->var.bits_per_pixel; + info->page->line_length = fb_info->fix.line_length; + info->page->mem_length = fb_info->fix.smem_len; + info->page->in_cons = info->page->in_prod = 0; + info->page->out_cons = info->page->out_prod = 0; +} + +static int xenfb_connect_backend(struct xenbus_device *dev, + struct xenfb_info *info) +{ + int ret; + struct xenbus_transaction xbt; + + ret = bind_listening_port_to_irqhandler( + dev->otherend_id, xenfb_event_handler, 0, "xenfb", info); + if (ret < 0) { + xenbus_dev_fatal(dev, ret, + "bind_listening_port_to_irqhandler"); + return ret; + } + info->irq = ret; + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + return ret; + } + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", + virt_to_mfn(info->page)); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(info->irq)); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1"); + if (ret) + goto error_xenbus; + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + return ret; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + return ret; +} + +static void xenfb_disconnect_backend(struct xenfb_info *info) +{ + if (info->irq >= 0) + unbind_from_irqhandler(info->irq, info); + info->irq = -1; +} + +static void xenfb_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct xenfb_info *info = dev_get_drvdata(&dev->dev); + int val; + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: + InitWait: + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateConnected: + /* + * Work around xenbus race condition: If backend goes + * through InitWait to Connected fast enough, we can + * get Connected twice here. + */ + if (dev->state != XenbusStateConnected) + goto InitWait; /* no InitWait seen yet, fudge it */ + + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-resize", "%d", &val) < 0) + val = 0; + info->feature_resize = val; + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "request-update", "%d", &val) < 0) + val = 0; + + if (val){ + info->kthread = kthread_run(xenfb_thread, info, + "xenfb thread"); + if (IS_ERR(info->kthread)) { + info->kthread = NULL; + xenbus_dev_fatal(dev, PTR_ERR(info->kthread), + "xenfb_thread"); + } + } + break; + + case XenbusStateClosing: + // FIXME is this safe in any dev->state? + xenbus_frontend_closed(dev); + break; + } +} + +static const struct xenbus_device_id xenfb_ids[] = { + { "vfb" }, + { "" } +}; +MODULE_ALIAS("xen:vfb"); + +static struct xenbus_driver xenfb_driver = { + .name = "vfb", + .ids = xenfb_ids, + .probe = xenfb_probe, + .remove = xenfb_remove, + .resume = xenfb_resume, + .otherend_changed = xenfb_backend_changed, +}; + +static int __init xenfb_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + /* Nothing to do if running in dom0. */ + if (is_initial_xendomain()) + return -ENODEV; + + return xenbus_register_frontend(&xenfb_driver); +} + +static void __exit xenfb_cleanup(void) +{ + return xenbus_unregister_driver(&xenfb_driver); +} + +module_init(xenfb_init); +module_exit(xenfb_cleanup); + +MODULE_DESCRIPTION("Xen virtual framebuffer device frontend"); +MODULE_LICENSE("GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/fbfront/Makefile +++ linux-ec2-2.6.31/drivers/xen/fbfront/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_XEN_FRAMEBUFFER) := xenfb.o +obj-$(CONFIG_XEN_KEYBOARD) += xenkbd.o --- linux-ec2-2.6.31.orig/drivers/xen/blktap/interface.c +++ linux-ec2-2.6.31/drivers/xen/blktap/interface.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * drivers/xen/blktap/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + + */ + +#include "common.h" +#include + +static struct kmem_cache *blkif_cachep; + +blkif_t *tap_alloc_blkif(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, tap_blkif_be_int, + 0, "blkif-backend", blkif); + if (err < 0) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void tap_blkif_unmap(blkif_t *blkif) +{ + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void tap_blkif_free(blkif_t *blkif) +{ + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + tap_blkif_unmap(blkif); +} + +void tap_blkif_kmem_cache_free(blkif_t *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +void __init tap_blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), + 0, 0, NULL); +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap/blocktap.c +++ linux-ec2-2.6.31/drivers/xen/blktap/blocktap.c @@ -0,0 +1 @@ +#include "blktap.c" --- linux-ec2-2.6.31.orig/drivers/xen/blktap/blktap.c +++ linux-ec2-2.6.31/drivers/xen/blktap/blktap.c @@ -0,0 +1,1783 @@ +/****************************************************************************** + * drivers/xen/blktap/blktap.c + * + * Back-end driver for user level virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. Requests + * are remapped to a user-space memory region. + * + * Based on the blkback driver code. + * + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Clean ups and fix ups: + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "common.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ +#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ + +/* + * The maximum number of requests that can be outstanding at any time + * is determined by + * + * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] + * + * where mmap_alloc < MAX_DYNAMIC_MEM. + * + * TODO: + * mmap_alloc is initialised to 2 and should be adjustable on the fly via + * sysfs. + */ +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) +#define MAX_DYNAMIC_MEM BLK_RING_SIZE +#define MAX_PENDING_REQS BLK_RING_SIZE +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define MMAP_VADDR(_start, _req,_seg) \ + (_start + \ + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) +static int blkif_reqs = MAX_PENDING_REQS; +static int mmap_pages = MMAP_PAGES; + +#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we + * have a bunch of pages reserved for shared + * memory rings. + */ + +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ +typedef struct domid_translate { + unsigned short domid; + unsigned short busid; +} domid_translate_t ; + +typedef struct domid_translate_ext { + unsigned short domid; + u32 busid; +} domid_translate_ext_t ; + +/*Data struct associated with each of the tapdisk devices*/ +typedef struct tap_blkif { + struct mm_struct *mm; /*User address space */ + unsigned long rings_vstart; /*Kernel memory mapping */ + unsigned long user_vstart; /*User memory mapping */ + unsigned long dev_inuse; /*One process opens device at a time. */ + unsigned long dev_pending; /*In process of being opened */ + unsigned long ring_ok; /*make this ring->state */ + blkif_front_ring_t ufe_ring; /*Rings up to user space. */ + wait_queue_head_t wait; /*for poll */ + unsigned long mode; /*current switching mode */ + int minor; /*Minor number for tapdisk device */ + pid_t pid; /*tapdisk process id */ + struct pid_namespace *pid_ns; /*... and its corresponding namespace */ + enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace + shutdown */ + unsigned long *idx_map; /*Record the user ring id to kern + [req id, idx] tuple */ + blkif_t *blkif; /*Associate blkif with tapdev */ + struct domid_translate_ext trans; /*Translation from domid to bus. */ + struct vm_foreign_map foreign_map; /*Mapping page */ +} tap_blkif_t; + +static struct tap_blkif *tapfds[MAX_TAP_DEV]; +static int blktap_next_minor; + +module_param(blkif_reqs, int, 0); +/* Run-time switchable: /sys/module/blktap/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + u64 id; + unsigned short mem_idx; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; + int inuse; +} pending_req_t; + +static pending_req_t *pending_reqs[MAX_PENDING_REQS]; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq); +static int alloc_pending_reqs; + +typedef unsigned int PEND_RING_IDX; + +static inline int MASK_PEND_IDX(int i) { + return (i & (MAX_PENDING_REQS-1)); +} + +static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) { + return (req - pending_reqs[idx]); +} + +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +#define BLKBACK_INVALID_HANDLE (~0) + +static struct page **foreign_pages[MAX_DYNAMIC_MEM]; +static inline unsigned long idx_to_kaddr( + unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx) +{ + unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx; + unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +static unsigned short mmap_alloc = 0; +static unsigned short mmap_lock = 0; +static unsigned short mmap_inuse = 0; + +/****************************************************************** + * GRANT HANDLES + */ + +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + grant_handle_t kernel; + grant_handle_t user; +}; +#define INVALID_GRANT_HANDLE 0xFFFF + +static struct grant_handle_pair + pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; +#define pending_handle(_id, _idx, _i) \ + (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \ + + (_i)]) + + +static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/ + +#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */ +#define BLKTAP_DEV_DIR "/dev/xen" + +static int blktap_major; + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */ +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_SENDPID 4 +#define BLKTAP_IOCTL_NEWINTF 5 +#define BLKTAP_IOCTL_MINOR 6 +#define BLKTAP_IOCTL_MAJOR 7 +#define BLKTAP_QUERY_ALLOC_REQS 8 +#define BLKTAP_IOCTL_FREEINTF 9 +#define BLKTAP_IOCTL_NEWINTF_EXT 50 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ((arg == BLKTAP_MODE_PASSTHROUGH ) || + (arg == BLKTAP_MODE_INTERCEPT_FE) || + (arg == BLKTAP_MODE_INTERPOSE )); +} + +/* Requests passing through the tap to userspace are re-assigned an ID. + * We must record a mapping between the BE [IDX,ID] tuple and the userspace + * ring ID. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) +{ + return ((fe_dom << 16) | MASK_PEND_IDX(idx)); +} + +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) +{ + return (PEND_RING_IDX)(id & 0x0000ffff); +} + +extern inline int ID_TO_MIDX(unsigned long id) +{ + return (int)(id >> 16); +} + +#define INVALID_REQ 0xdead0000 + +/*TODO: Convert to a free list*/ +static inline int GET_NEXT_REQ(unsigned long *idx_map) +{ + int i; + for (i = 0; i < MAX_PENDING_REQS; i++) + if (idx_map[i] == INVALID_REQ) + return i; + + return INVALID_REQ; +} + +static inline int OFFSET_TO_USR_IDX(int offset) +{ + return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST; +} + +static inline int OFFSET_TO_SEG(int offset) +{ + return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST; +} + + +#define BLKTAP_INVALID_HANDLE(_g) \ + (((_g->kernel) == INVALID_GRANT_HANDLE) && \ + ((_g->user) == INVALID_GRANT_HANDLE)) + +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ + (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \ + } while(0) + + +/****************************************************************** + * BLKTAP VM OPS + */ + +static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + /* + * if the page has not been mapped in by the driver then return + * VM_FAULT_SIGBUS to the domain. + */ + + return VM_FAULT_SIGBUS; +} + +static pte_t blktap_clear_pte(struct vm_area_struct *vma, + unsigned long uvaddr, + pte_t *ptep, int is_fullmm) +{ + pte_t copy; + tap_blkif_t *info = NULL; + int offset, seg, usr_idx, pending_idx, mmap_idx; + unsigned long uvstart = 0; + unsigned long kvaddr; + struct page *pg; + struct grant_handle_pair *khandle; + struct gnttab_unmap_grant_ref unmap[2]; + int count = 0; + + /* + * If the address is before the start of the grant mapped region or + * if vm_file is NULL (meaning mmap failed and we have nothing to do) + */ + if (vma->vm_file != NULL) { + info = vma->vm_file->private_data; + uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); + } + if (vma->vm_file == NULL || uvaddr < uvstart) + return xen_ptep_get_and_clear_full(vma, uvaddr, ptep, + is_fullmm); + + /* TODO Should these be changed to if statements? */ + BUG_ON(!info); + BUG_ON(!info->idx_map); + + offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT); + usr_idx = OFFSET_TO_USR_IDX(offset); + seg = OFFSET_TO_SEG(offset); + + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); + + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg); + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ClearPageReserved(pg); + info->foreign_map.map[offset + RING_PAGES] = NULL; + + khandle = &pending_handle(mmap_idx, pending_idx, seg); + + if (khandle->kernel != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[count], kvaddr, + GNTMAP_host_map, khandle->kernel); + count++; + + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + } + + if (khandle->user != INVALID_GRANT_HANDLE) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + + copy = *ptep; + gnttab_set_unmap_op(&unmap[count], ptep_to_machine(ptep), + GNTMAP_host_map + | GNTMAP_application_map + | GNTMAP_contains_pte, + khandle->user); + count++; + } else { + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)); + + /* USING SHADOW PAGE TABLES. */ + copy = xen_ptep_get_and_clear_full(vma, uvaddr, ptep, + is_fullmm); + } + + if (count) { + BLKTAP_INVALIDATE_HANDLE(khandle); + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, count)) + BUG(); + } + + return copy; +} + +static void blktap_vma_open(struct vm_area_struct *vma) +{ + tap_blkif_t *info; + if (vma->vm_file == NULL) + return; + + info = vma->vm_file->private_data; + vma->vm_private_data = + &info->foreign_map.map[(vma->vm_start - info->rings_vstart) >> PAGE_SHIFT]; +} + +/* tricky part + * When partial munmapping, ->open() is called only splitted vma which + * will be released soon. * See split_vma() and do_munmap() in mm/mmap.c + * So there is no chance to fix up vm_private_data of the end vma. + */ +static void blktap_vma_close(struct vm_area_struct *vma) +{ + tap_blkif_t *info; + struct vm_area_struct *next = vma->vm_next; + + if (next == NULL || + vma->vm_ops != next->vm_ops || + vma->vm_end != next->vm_start || + vma->vm_file == NULL || + vma->vm_file != next->vm_file) + return; + + info = vma->vm_file->private_data; + next->vm_private_data = + &info->foreign_map.map[(next->vm_start - info->rings_vstart) >> PAGE_SHIFT]; +} + +static struct vm_operations_struct blktap_vm_ops = { + fault: blktap_fault, + zap_pte: blktap_clear_pte, + open: blktap_vma_open, + close: blktap_vma_close, +}; + +/****************************************************************** + * BLKTAP FILE OPS + */ + +/*Function Declarations*/ +static tap_blkif_t *get_next_free_dev(void); +static int blktap_open(struct inode *inode, struct file *filp); +static int blktap_release(struct inode *inode, struct file *filp); +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +static unsigned int blktap_poll(struct file *file, poll_table *wait); + +static const struct file_operations blktap_fops = { + .owner = THIS_MODULE, + .poll = blktap_poll, + .ioctl = blktap_ioctl, + .open = blktap_open, + .release = blktap_release, + .mmap = blktap_mmap, +}; + + +static tap_blkif_t *get_next_free_dev(void) +{ + struct class *class; + tap_blkif_t *info; + int minor; + + /* + * This is called only from the ioctl, which + * means we should always have interrupts enabled. + */ + BUG_ON(irqs_disabled()); + + spin_lock_irq(&pending_free_lock); + + /* tapfds[0] is always NULL */ + + for (minor = 1; minor < blktap_next_minor; minor++) { + info = tapfds[minor]; + /* we could have failed a previous attempt. */ + if (!info || + ((!test_bit(0, &info->dev_inuse)) && + (info->dev_pending == 0)) ) { + info->dev_pending = 1; + goto found; + } + } + info = NULL; + minor = -1; + + /* + * We didn't find free device. If we can still allocate + * more, then we grab the next device minor that is + * available. This is done while we are still under + * the protection of the pending_free_lock. + */ + if (blktap_next_minor < MAX_TAP_DEV) + minor = blktap_next_minor++; +found: + spin_unlock_irq(&pending_free_lock); + + if (!info && minor > 0) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (unlikely(!info)) { + /* + * If we failed here, try to put back + * the next minor number. But if one + * was just taken, then we just lose this + * minor. We can try to allocate this + * minor again later. + */ + spin_lock_irq(&pending_free_lock); + if (blktap_next_minor == minor+1) + blktap_next_minor--; + spin_unlock_irq(&pending_free_lock); + goto out; + } + + info->minor = minor; + /* + * Make sure that we have a minor before others can + * see us. + */ + wmb(); + tapfds[minor] = info; + + if ((class = get_xen_class()) != NULL) + device_create(class, NULL, MKDEV(blktap_major, minor), + NULL, "blktap%d", minor); + } + +out: + return info; +} + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +{ + tap_blkif_t *info; + int i; + + for (i = 1; i < blktap_next_minor; i++) { + info = tapfds[i]; + if ( info && + (info->trans.domid == domid) && + (info->trans.busid == xenbus_id) ) { + info->blkif = blkif; + info->status = RUNNING; + return i; + } + } + return -1; +} + +void signal_tapdisk(int idx) +{ + tap_blkif_t *info; + struct task_struct *ptask; + + /* + * if the userland tools set things up wrong, this could be negative; + * just don't try to signal in this case + */ + if (idx < 0) + return; + + info = tapfds[idx]; + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + if (info->pid > 0) { + ptask = pid_task(find_pid_ns(info->pid, info->pid_ns), + PIDTYPE_PID); + if (ptask) + info->status = CLEANSHUTDOWN; + } + info->blkif = NULL; + + return; +} + +static int blktap_open(struct inode *inode, struct file *filp) +{ + blkif_sring_t *sring; + int idx = iminor(inode) - BLKTAP_MINOR; + tap_blkif_t *info; + int i; + + /* ctrl device, treat differently */ + if (!idx) + return 0; + + info = tapfds[idx]; + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { + WPRINTK("Unable to open device /dev/xen/blktap%d\n", + idx); + return -ENODEV; + } + + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); + + /*Only one process can access device at a time*/ + if (test_and_set_bit(0, &info->dev_inuse)) + return -EBUSY; + + info->dev_pending = 0; + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); + + filp->private_data = info; + info->mm = NULL; + + info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, + GFP_KERNEL); + + if (info->idx_map == NULL) + goto fail_nomem; + + if (idx > 0) { + init_waitqueue_head(&info->wait); + for (i = 0; i < MAX_PENDING_REQS; i++) + info->idx_map[i] = INVALID_REQ; + } + + DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx); + return 0; + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + tap_blkif_t *info = filp->private_data; + + /* check for control device */ + if (!info) + return 0; + + info->ring_ok = 0; + smp_wmb(); + + mmput(info->mm); + info->mm = NULL; + kfree(info->foreign_map.map); + info->foreign_map.map = NULL; + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(info->ufe_ring.sring)); + free_page((unsigned long) info->ufe_ring.sring); + + if (info->idx_map) { + kfree(info->idx_map); + info->idx_map = NULL; + } + + if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) { + if (info->blkif->xenblkd != NULL) { + kthread_stop(info->blkif->xenblkd); + info->blkif->xenblkd = NULL; + } + info->status = CLEANSHUTDOWN; + } + + clear_bit(0, &info->dev_inuse); + DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor); + + return 0; +} + + +/* Note on mmap: + * We need to map pages to user space in a way that will allow the block + * subsystem set up direct IO to them. This couldn't be done before, because + * there isn't really a sane way to translate a user virtual address down to a + * physical address when the page belongs to another domain. + * + * My first approach was to map the page in to kernel memory, add an entry + * for it in the physical frame list (using alloc_lomem_region as in blkback) + * and then attempt to map that page up to user space. This is disallowed + * by xen though, which realizes that we don't really own the machine frame + * underlying the physical page. + * + * The new approach is to provide explicit support for this in xen linux. + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages + * mapped from other vms. vma->vm_private_data is set up as a mapping + * from pages to actual page structs. There is a new clause in get_user_pages + * that does the right thing for this sort of mapping. + */ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + tap_blkif_t *info = filp->private_data; + int ret; + + if (info == NULL) { + WPRINTK("blktap: mmap, retrieving idx failed\n"); + return -ENOMEM; + } + + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) { + WPRINTK("you _must_ map exactly %d pages!\n", + mmap_pages + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + info->rings_vstart = vma->vm_start; + info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page(info->ufe_ring.sring)); + else + ret = remap_pfn_range(vma, vma->vm_start, + __pa(info->ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); + if (ret) { + WPRINTK("Mapping user ring failed!\n"); + goto fail; + } + + /* Mark this VM as containing foreign pages, and set up mappings. */ + info->foreign_map.map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) * + sizeof(*info->foreign_map.map), GFP_KERNEL); + if (info->foreign_map.map == NULL) { + WPRINTK("Couldn't alloc VM_FOREIGN map.\n"); + goto fail; + } + + vma->vm_private_data = &info->foreign_map; + vma->vm_flags |= VM_FOREIGN; + vma->vm_flags |= VM_DONTCOPY; + +#ifdef CONFIG_X86 + vma->vm_mm->context.has_foreign_mappings = 1; +#endif + + info->mm = get_task_mm(current); + smp_wmb(); + info->ring_ok = 1; + return 0; + fail: + /* Clear any active mappings. */ + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); + + return -ENOMEM; +} + + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + tap_blkif_t *info = filp->private_data; + + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: + { + /* There are fe messages to process. */ + return blktap_read_ufe_ring(info); + } + case BLKTAP_IOCTL_SETMODE: + { + if (info) { + if (BLKTAP_MODE_VALID(arg)) { + info->mode = arg; + /* XXX: may need to flush rings here. */ + DPRINTK("blktap: set mode to %lx\n", + arg); + return 0; + } + } + return 0; + } + case BLKTAP_IOCTL_PRINT_IDXS: + { + if (info) { + printk("User Rings: \n-----------\n"); + printk("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + info->ufe_ring.rsp_cons, + info->ufe_ring.req_prod_pvt, + info->ufe_ring.sring->req_prod, + info->ufe_ring.sring->rsp_prod); + } + return 0; + } + case BLKTAP_IOCTL_SENDPID: + { + if (info) { + info->pid = (pid_t)arg; + info->pid_ns = current->nsproxy->pid_ns; + DPRINTK("blktap: pid received %p:%d\n", + info->pid_ns, info->pid); + } + return 0; + } + case BLKTAP_IOCTL_NEWINTF: + { + uint64_t val = (uint64_t)arg; + domid_translate_t *tr = (domid_translate_t *)&val; + + DPRINTK("NEWINTF Req for domid %d and bus id %d\n", + tr->domid, tr->busid); + info = get_next_free_dev(); + if (!info) { + WPRINTK("Error initialising /dev/xen/blktap - " + "No more devices\n"); + return -1; + } + info->trans.domid = tr->domid; + info->trans.busid = tr->busid; + return info->minor; + } + case BLKTAP_IOCTL_NEWINTF_EXT: + { + void __user *udata = (void __user *) arg; + domid_translate_ext_t tr; + + if (copy_from_user(&tr, udata, sizeof(domid_translate_ext_t))) + return -EFAULT; + + DPRINTK("NEWINTF_EXT Req for domid %d and bus id %d\n", + tr.domid, tr.busid); + info = get_next_free_dev(); + if (!info) { + WPRINTK("Error initialising /dev/xen/blktap - " + "No more devices\n"); + return -1; + } + info->trans.domid = tr.domid; + info->trans.busid = tr.busid; + return info->minor; + } + case BLKTAP_IOCTL_FREEINTF: + { + unsigned long dev = arg; + unsigned long flags; + + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return 0; /* should this be an error? */ + + spin_lock_irqsave(&pending_free_lock, flags); + if (info->dev_pending) + info->dev_pending = 0; + spin_unlock_irqrestore(&pending_free_lock, flags); + + return 0; + } + case BLKTAP_IOCTL_MINOR: + { + unsigned long dev = arg; + + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return -EINVAL; + + return info->minor; + } + case BLKTAP_IOCTL_MAJOR: + return blktap_major; + + case BLKTAP_QUERY_ALLOC_REQS: + { + WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n", + alloc_pending_reqs, blkif_reqs); + return (alloc_pending_reqs/blkif_reqs) * 100; + } + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *filp, poll_table *wait) +{ + tap_blkif_t *info = filp->private_data; + + /* do not work on the control device */ + if (!info) + return 0; + + poll_wait(filp, &info->wait, wait); + if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) { + RING_PUSH_REQUESTS(&info->ufe_ring); + return POLLIN | POLLRDNORM; + } + return 0; +} + +static void blktap_kick_user(int idx) +{ + tap_blkif_t *info; + + info = tapfds[idx]; + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + wake_up_interruptible(&info->wait); + + return; +} + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +static int req_increase(void) +{ + int i, j; + + if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) + return -EINVAL; + + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) + * blkif_reqs, GFP_KERNEL); + foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc]) + goto out_of_memory; + + DPRINTK("%s: reqs=%d, pages=%d\n", + __FUNCTION__, blkif_reqs, mmap_pages); + + for (i = 0; i < MAX_PENDING_REQS; i++) { + list_add_tail(&pending_reqs[mmap_alloc][i].free_list, + &pending_free); + pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc; + for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) + BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, + i, j)); + } + + mmap_alloc++; + DPRINTK("# MMAPs increased to %d\n",mmap_alloc); + return 0; + + out_of_memory: + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); + kfree(pending_reqs[mmap_alloc]); + WPRINTK("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +static void mmap_req_del(int mmap) +{ + assert_spin_locked(&pending_free_lock); + + kfree(pending_reqs[mmap]); + pending_reqs[mmap] = NULL; + + free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages); + foreign_pages[mmap] = NULL; + + mmap_lock = 0; + DPRINTK("# MMAPs decreased to %d\n",mmap_alloc); + mmap_alloc--; +} + +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + + if (req) { + req->inuse = 1; + alloc_pending_reqs++; + } + spin_unlock_irqrestore(&pending_free_lock, flags); + + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + + alloc_pending_reqs--; + req->inuse = 0; + if (mmap_lock && (req->mem_idx == mmap_alloc-1)) { + mmap_inuse--; + if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1); + spin_unlock_irqrestore(&pending_free_lock, flags); + return; + } + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + + spin_unlock_irqrestore(&pending_free_lock, flags); + + if (was_empty) + wake_up(&pending_free_wq); +} + +static void blktap_zap_page_range(struct mm_struct *mm, + unsigned long uvaddr, int nr_pages) +{ + unsigned long end = uvaddr + (nr_pages << PAGE_SHIFT); + struct vm_area_struct *vma; + + vma = find_vma(mm, uvaddr); + while (vma && uvaddr < end) { + unsigned long s = max(uvaddr, vma->vm_start); + unsigned long e = min(end, vma->vm_end); + + zap_page_range(vma, s, e - s, NULL); + + uvaddr = e; + vma = vma->vm_next; + } +} + +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, + int tapidx) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int i, invcount = 0, locked = 0; + struct grant_handle_pair *khandle; + uint64_t ptep; + int ret, mmap_idx; + unsigned long kvaddr, uvaddr; + tap_blkif_t *info; + struct mm_struct *mm; + + + info = tapfds[tapidx]; + + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { + WPRINTK("fast_flush: Couldn't get info!\n"); + return; + } + + mm = info->mm; + + if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) { + down_write(&mm->mmap_sem); + blktap_zap_page_range(mm, + MMAP_VADDR(info->user_vstart, u_idx, 0), + req->nr_pages); + up_write(&mm->mmap_sem); + return; + } + + mmap_idx = req->mem_idx; + + for (i = 0; i < req->nr_pages; i++) { + kvaddr = idx_to_kaddr(mmap_idx, k_idx, i); + uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); + + khandle = &pending_handle(mmap_idx, k_idx, i); + + if (khandle->kernel != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[invcount], + idx_to_kaddr(mmap_idx, k_idx, i), + GNTMAP_host_map, khandle->kernel); + invcount++; + + set_phys_to_machine( + __pa(idx_to_kaddr(mmap_idx, k_idx, i)) + >> PAGE_SHIFT, INVALID_P2M_ENTRY); + } + + if (khandle->user != INVALID_GRANT_HANDLE) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + if (!locked++) + down_write(&mm->mmap_sem); + if (create_lookup_pte_addr( + mm, + MMAP_VADDR(info->user_vstart, u_idx, i), + &ptep) !=0) { + up_write(&mm->mmap_sem); + WPRINTK("Couldn't get a pte addr!\n"); + return; + } + + gnttab_set_unmap_op(&unmap[invcount], ptep, + GNTMAP_host_map + | GNTMAP_application_map + | GNTMAP_contains_pte, + khandle->user); + invcount++; + } + + BLKTAP_INVALIDATE_HANDLE(khandle); + } + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + + if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) { + if (!locked++) + down_write(&mm->mmap_sem); + blktap_zap_page_range(mm, + MMAP_VADDR(info->user_vstart, u_idx, 0), + req->nr_pages); + } + + if (locked) + up_write(&mm->mmap_sem); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | pk %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_pk_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; + blkif->st_pk_req = 0; +} + +int tap_blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called by user level ioctl() + */ + +static int blktap_read_ufe_ring(tap_blkif_t *info) +{ + /* This is called to read responses from the UFE ring. */ + RING_IDX i, j, rp; + blkif_response_t *resp; + blkif_t *blkif=NULL; + int pending_idx, usr_idx, mmap_idx; + pending_req_t *pending_req; + + if (!info) + return 0; + + /* We currently only forward packets in INTERCEPT_FE mode. */ + if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE)) + return 0; + + /* for each outstanding message on the UFEring */ + rp = info->ufe_ring.sring->rsp_prod; + rmb(); + + for (i = info->ufe_ring.rsp_cons; i != rp; i++) { + blkif_response_t res; + resp = RING_GET_RESPONSE(&info->ufe_ring, i); + memcpy(&res, resp, sizeof(res)); + mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */ + ++info->ufe_ring.rsp_cons; + + /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ + usr_idx = (int)res.id; + pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); + mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); + + if ( (mmap_idx >= mmap_alloc) || + (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) ) + WPRINTK("Incorrect req map" + "[%d], internal map [%d,%d (%d)]\n", + usr_idx, mmap_idx, + ID_TO_IDX(info->idx_map[usr_idx]), + MASK_PEND_IDX( + ID_TO_IDX(info->idx_map[usr_idx]))); + + pending_req = &pending_reqs[mmap_idx][pending_idx]; + blkif = pending_req->blkif; + + for (j = 0; j < pending_req->nr_pages; j++) { + + unsigned long kvaddr, uvaddr; + struct page *pg; + int offset; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j); + + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ClearPageReserved(pg); + offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT; + info->foreign_map.map[offset] = NULL; + } + fast_flush_area(pending_req, pending_idx, usr_idx, info->minor); + info->idx_map[usr_idx] = INVALID_REQ; + make_response(blkif, pending_req->id, res.operation, + res.status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } + + return 0; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t tap_blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ +static int print_dbug = 1; +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + blkif_request_t req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + tap_blkif_t *info; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + /*Check blkif has corresponding UE ring*/ + if (blkif->dev_num < 0) { + /*oops*/ + if (print_dbug) { + WPRINTK("Corresponding UE " + "ring does not exist!\n"); + print_dbug = 0; /*We only print this message once*/ + } + return 0; + } + + info = tapfds[blkif->dev_num]; + + if (blkif->dev_num > MAX_TAP_DEV || !info || + !test_bit(0, &info->dev_inuse)) { + if (print_dbug) { + WPRINTK("Can't get UE info!\n"); + print_dbug = 0; + } + return 0; + } + + while (rc != rp) { + + if (RING_FULL(&info->ufe_ring)) { + WPRINTK("RING_FULL! More to do\n"); + more_to_do = 1; + break; + } + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) { + WPRINTK("RING_REQUEST_CONS_OVERFLOW!" + " More to do\n"); + more_to_do = 1; + break; + } + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), + sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + + case BLKIF_OP_WRITE_BARRIER: + /* TODO Some counter? */ + /* Fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + + case BLKIF_OP_PACKET: + blkif->st_pk_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + WPRINTK("unknown operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + blktap_kick_user(blkif->dev_num); + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + int op, operation; + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int nseg; + int ret, i, nr_sects = 0; + tap_blkif_t *info; + blkif_request_t *target; + int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); + int usr_idx; + uint16_t mmap_idx = pending_req->mem_idx; + struct mm_struct *mm; + struct vm_area_struct *vma = NULL; + + switch (req->operation) { + case BLKIF_OP_PACKET: + /* Fall through */ + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV) + goto fail_response; + + info = tapfds[blkif->dev_num]; + if (info == NULL) + goto fail_response; + + /* Check we have space on user ring - should never fail. */ + usr_idx = GET_NEXT_REQ(info->idx_map); + if (usr_idx == INVALID_REQ) { + BUG(); + goto fail_response; + } + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if ( unlikely(nseg == 0) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) { + WPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + /* Make sure userspace is ready. */ + if (!info->ring_ok) { + WPRINTK("blktap: ring not ready for requests!\n"); + goto fail_response; + } + smp_rmb(); + + if (RING_FULL(&info->ufe_ring)) { + WPRINTK("blktap: fe_ring is full, can't add " + "IO Request will be dropped. %d %d\n", + RING_SIZE(&info->ufe_ring), + RING_SIZE(&blkif->blk_rings.common)); + goto fail_response; + } + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + op = 0; + mm = info->mm; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + down_write(&mm->mmap_sem); + for (i = 0; i < nseg; i++) { + unsigned long uvaddr; + unsigned long kvaddr; + uint64_t ptep; + uint32_t flags; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + + flags = GNTMAP_host_map; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], kvaddr, flags, + req->seg[i].gref, blkif->domid); + op++; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Now map it to user. */ + ret = create_lookup_pte_addr(mm, uvaddr, &ptep); + if (ret) { + up_write(&mm->mmap_sem); + WPRINTK("Couldn't get a pte addr!\n"); + goto fail_flush; + } + + flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[op], ptep, flags, + req->seg[i].gref, blkif->domid); + op++; + } + + nr_sects += (req->seg[i].last_sect - + req->seg[i].first_sect + 1); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); + BUG_ON(ret); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + up_write(&mm->mmap_sem); + + for (i = 0; i < (nseg*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + struct page *pg; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2); + + if (unlikely(map[i].status != 0)) { + WPRINTK("invalid kernel buffer -- " + "could not remap it\n"); + ret |= 1; + map[i].handle = INVALID_GRANT_HANDLE; + } + + if (unlikely(map[i+1].status != 0)) { + WPRINTK("invalid user buffer -- " + "could not remap it\n"); + ret |= 1; + map[i+1].handle = INVALID_GRANT_HANDLE; + } + + pending_handle(mmap_idx, pending_idx, i/2).kernel + = map[i].handle; + pending_handle(mmap_idx, pending_idx, i/2).user + = map[i+1].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr + >> PAGE_SHIFT)); + offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT; + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + info->foreign_map.map[offset] = pg; + } + } else { + for (i = 0; i < nseg; i++) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + struct page *pg; + + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + + if (unlikely(map[i].status != 0)) { + WPRINTK("invalid kernel buffer -- " + "could not remap it\n"); + ret |= 1; + map[i].handle = INVALID_GRANT_HANDLE; + } + + pending_handle(mmap_idx, pending_idx, i).kernel + = map[i].handle; + + if (ret) + continue; + + offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT; + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + info->foreign_map.map[offset] = pg; + } + } + + if (ret) + goto fail_flush; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + down_write(&mm->mmap_sem); + /* Mark mapped pages as reserved: */ + for (i = 0; i < req->nr_segments; i++) { + unsigned long kvaddr; + struct page *pg; + + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + SetPageReserved(pg); + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long uvaddr = MMAP_VADDR(info->user_vstart, + usr_idx, i); + if (vma && uvaddr >= vma->vm_end) { + vma = vma->vm_next; + if (vma && + (uvaddr < vma->vm_start || + uvaddr >= vma->vm_end)) + vma = NULL; + } + if (vma == NULL) { + vma = find_vma(mm, uvaddr); + /* this virtual area was already munmapped. + so skip to next page */ + if (!vma) + continue; + } + ret = vm_insert_page(vma, uvaddr, pg); + if (ret) { + up_write(&mm->mmap_sem); + goto fail_flush; + } + } + } + if (xen_feature(XENFEAT_auto_translated_physmap)) + up_write(&mm->mmap_sem); + + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ + info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); + + blkif_get(blkif); + /* Finally, write the request message to the user ring. */ + target = RING_GET_REQUEST(&info->ufe_ring, + info->ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + target->id = usr_idx; + wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */ + info->ufe_ring.req_prod_pvt++; + + if (operation == READ) + blkif->st_rd_sect += nr_sects; + else if (operation == WRITE) + blkif->st_wr_sect += nr_sects; + + return; + + fail_flush: + WPRINTK("Reached Fail_flush\n"); + fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st) +{ + blkif_response_t resp; + unsigned long flags; + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, + blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, + blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, + blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, ret; + struct class *class; + + if (!is_running_on_xen()) + return -ENODEV; + + INIT_LIST_HEAD(&pending_free); + for(i = 0; i < 2; i++) { + ret = req_increase(); + if (ret) + break; + } + if (i == 0) + return ret; + + tap_blkif_interface_init(); + + alloc_pending_reqs = 0; + + tap_blkif_xenbus_init(); + + /* Dynamically allocate a major for this device */ + ret = register_chrdev(0, "blktap", &blktap_fops); + + if (ret < 0) { + WPRINTK("Couldn't register /dev/xen/blktap\n"); + return -ENOMEM; + } + + blktap_major = ret; + + /* tapfds[0] is always NULL */ + blktap_next_minor++; + + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + + /* Make sure the xen class exists */ + if ((class = get_xen_class()) != NULL) { + /* + * This will allow udev to create the blktap ctrl device. + * We only want to create blktap0 first. We don't want + * to flood the sysfs system with needless blktap devices. + * We only create the device when a request of a new device is + * made. + */ + device_create(class, NULL, MKDEV(blktap_major, 0), NULL, + "blktap0"); + } else { + /* this is bad, but not fatal */ + WPRINTK("blktap: sysfs xen_class not created\n"); + } + + DPRINTK("Blktap device successfully created\n"); + + return 0; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/blktap/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/blktap/xenbus.c @@ -0,0 +1,509 @@ +/* drivers/xen/blktap/xenbus.c + * + * Xenbus code for blktap + * + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Based on the blkback xenbus code: + * + * Copyright (C) 2005 Rusty Russell + * Copyright (C) 2005 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "common.h" +#include "../core/domctl.h" + + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + int xenbus_id; + int group_added; +}; + + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static int blktap_remove(struct xenbus_device *dev); +static int blktap_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id); +static void tap_backend_changed(struct xenbus_watch *, const char **, + unsigned int); +static void tap_frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state); + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +static long get_id(const char *str) +{ + int len,end; + const char *ptr; + char *tptr, num[10]; + + len = strsep_len(str, '/', 2); + end = strlen(str); + if ( (len < 0) || (end < 0) ) return -1; + + ptr = str + len + 1; + strncpy(num,ptr,end - len); + tptr = num + (end - (len + 1)); + *tptr = '\0'; + DPRINTK("Get_id called for %s (%s)\n",str,num); + + return simple_strtol(num, NULL, 10); +} + +static int blktap_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +/**************************************************************** + * sysfs interface for I/O requests of blktap device + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *tapstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group tapstat_group = { + .name = "statistics", + .attrs = tapstat_attrs, +}; + +int xentap_sysfs_addif(struct xenbus_device *dev) +{ + int err; + struct backend_info *be = dev_get_drvdata(&dev->dev); + err = sysfs_create_group(&dev->dev.kobj, &tapstat_group); + if (!err) + be->group_added = 1; + return err; +} + +void xentap_sysfs_delif(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + sysfs_remove_group(&dev->dev.kobj, &tapstat_group); + be->group_added = 0; +} + +static int blktap_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + if (be->group_added) + xentap_sysfs_delif(be->dev); + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + if (be->blkif) { + if (be->blkif->xenblkd) + kthread_stop(be->blkif->xenblkd); + signal_tapdisk(be->blkif->dev_num); + tap_blkif_free(be->blkif); + tap_blkif_kmem_cache_free(be->blkif); + be->blkif = NULL; + } + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +static void tap_update_blkif_status(blkif_t *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if(!blkif->irq || !blkif->sectors) { + return; + } + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blktap_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blktap dev name"); + return; + } + + if (!blkif->be->group_added) { + err = xentap_sysfs_addif(blkif->be->dev); + if (err) { + xenbus_dev_fatal(blkif->be->dev, err, + "creating sysfs entries"); + return; + } + } + + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd"); + WPRINTK("Error starting thread\n"); + } +} + +/** + * Entry point to this code when a new device is created. Allocate + * the basic structures, and watch the store waiting for the + * user-space program to tell us the physical device info. Switch to + * InitWait. + */ +static int blktap_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + be->xenbus_id = get_id(dev->nodename); + + be->blkif = tap_alloc_blkif(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + be->blkif->sectors = 0; + + /* set a watch on disk info, waiting for userspace to update details*/ + err = xenbus_watch_path2(dev, dev->nodename, "info", + &be->backend_watch, tap_backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + return 0; + +fail: + DPRINTK("blktap probe failed\n"); + blktap_remove(dev); + return err; +} + + +/** + * Callback received when the user space code has placed the device + * information in xenstore. + */ +static void tap_backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned long info; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + + /** + * Check to see whether userspace code has opened the image + * and written sector + * and disk info to xenstore + */ + err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, + NULL); + if (XENBUS_EXIST_ERR(err)) + return; + if (err) { + xenbus_dev_error(dev, err, "getting info"); + return; + } + + DPRINTK("Userspace update on disk info, %lu\n",info); + + err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", + &be->blkif->sectors, NULL); + + /* Associate tap dev with domid*/ + be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, + be->blkif); + DPRINTK("Thread started for domid [%d], connecting disk\n", + be->blkif->dev_num); + + tap_update_blkif_status(be->blkif); +} + +/** + * Callback received when the frontend's state changes. + */ +static void tap_frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + DPRINTK("\n"); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + tap_update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + if (be->blkif->xenblkd) { + kthread_stop(be->blkif->xenblkd); + be->blkif->xenblkd = NULL; + } + tap_blkif_free(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/** + * Switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + int err; + + struct xenbus_device *dev = be->dev; + struct xenbus_transaction xbt; + + /* Write feature-barrier to xenstore */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", "1"); + if (err) { + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + xenbus_transaction_end(xbt, 1); + return; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + + /* Switch state */ + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64]; + int err; + + DPRINTK("%s\n", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) { + strcpy(protocol, "unspecified"); + be->blkif->blk_protocol = xen_guest_blkif_protocol(be->blkif->domid); + } + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; +#if 1 /* maintain compatibility with early sles10-sp1 and paravirt netware betas */ + else if (0 == strcmp(protocol, "1")) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, "2")) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; +#endif + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = tap_blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blktap_ids[] = { + { "tap" }, + { "" } +}; + + +static struct xenbus_driver blktap = { + .name = "tap", + .ids = blktap_ids, + .probe = blktap_probe, + .remove = blktap_remove, + .otherend_changed = tap_frontend_changed +}; + + +void tap_blkif_xenbus_init(void) +{ + if (xenbus_register_backend(&blktap)) + BUG(); +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap/Makefile +++ linux-ec2-2.6.31/drivers/xen/blktap/Makefile @@ -0,0 +1,5 @@ +LINUXINCLUDE += -I../xen/include/public/io + +obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o + +blktap-y := xenbus.o interface.o blocktap.o --- linux-ec2-2.6.31.orig/drivers/xen/blktap/common.h +++ linux-ec2-2.6.31/drivers/xen/blktap/common.h @@ -0,0 +1,123 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + blkif_back_rings_t blk_rings; + struct vm_struct *blk_ring_area; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + struct request_queue *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_pk_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; + + int dev_num; + uint64_t sectors; +} blkif_t; + +blkif_t *tap_alloc_blkif(domid_t domid); +void tap_blkif_free(blkif_t *blkif); +void tap_blkif_kmem_cache_free(blkif_t *blkif); +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, + unsigned int evtchn); +void tap_blkif_unmap(blkif_t *blkif); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +void tap_blkif_interface_init(void); + +void tap_blkif_xenbus_init(void); + +irqreturn_t tap_blkif_be_int(int irq, void *dev_id); +int tap_blkif_schedule(void *arg); + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); +void signal_tapdisk(int idx); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/wait_queue.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/wait_queue.c @@ -0,0 +1,40 @@ +#include +#include + +#include "blktap.h" + +static LIST_HEAD(deferred_work_queue); +static DEFINE_SPINLOCK(deferred_work_lock); + +void +blktap_run_deferred(void) +{ + LIST_HEAD(queue); + struct blktap *tap; + unsigned long flags; + + spin_lock_irqsave(&deferred_work_lock, flags); + list_splice_init(&deferred_work_queue, &queue); + list_for_each_entry(tap, &queue, deferred_queue) + clear_bit(BLKTAP_DEFERRED, &tap->dev_inuse); + spin_unlock_irqrestore(&deferred_work_lock, flags); + + while (!list_empty(&queue)) { + tap = list_entry(queue.next, struct blktap, deferred_queue); + list_del_init(&tap->deferred_queue); + blktap_device_restart(tap); + } +} + +void +blktap_defer(struct blktap *tap) +{ + unsigned long flags; + + spin_lock_irqsave(&deferred_work_lock, flags); + if (!test_bit(BLKTAP_DEFERRED, &tap->dev_inuse)) { + set_bit(BLKTAP_DEFERRED, &tap->dev_inuse); + list_add_tail(&tap->deferred_queue, &deferred_work_queue); + } + spin_unlock_irqrestore(&deferred_work_lock, flags); +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/ring.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/ring.c @@ -0,0 +1,612 @@ +#include +#include + +#include "blktap.h" + +static int blktap_ring_major; + +static inline struct blktap * +vma_to_blktap(struct vm_area_struct *vma) +{ + struct vm_foreign_map *m = vma->vm_private_data; + struct blktap_ring *r = container_of(m, struct blktap_ring, foreign_map); + return container_of(r, struct blktap, ring); +} + + /* + * BLKTAP - immediately before the mmap area, + * we have a bunch of pages reserved for shared memory rings. + */ +#define RING_PAGES 1 + +static int +blktap_read_ring(struct blktap *tap) +{ + /* This is called to read responses from the ring. */ + int usr_idx; + RING_IDX rc, rp; + blkif_response_t res; + struct blktap_ring *ring; + struct blktap_request *request; + + down_read(&tap->tap_sem); + + ring = &tap->ring; + if (!ring->vma) { + up_read(&tap->tap_sem); + return 0; + } + + /* for each outstanding message on the ring */ + rp = ring->ring.sring->rsp_prod; + rmb(); + + for (rc = ring->ring.rsp_cons; rc != rp; rc++) { + memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res)); + mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */ + ++ring->ring.rsp_cons; + + usr_idx = (int)res.id; + if (usr_idx >= MAX_PENDING_REQS || + !tap->pending_requests[usr_idx]) { + BTWARN("Request %d/%d invalid [%x], tapdisk %d%p\n", + rc, rp, usr_idx, tap->pid, ring->vma); + continue; + } + + request = tap->pending_requests[usr_idx]; + BTDBG("request %p response #%d id %x\n", request, rc, usr_idx); + blktap_device_finish_request(tap, &res, request); + } + + up_read(&tap->tap_sem); + + blktap_run_deferred(); + + return 0; +} + +static int +blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + /* + * if the page has not been mapped in by the driver then return + * VM_FAULT_SIGBUS to the domain. + */ + + return VM_FAULT_SIGBUS; +} + +static pte_t +blktap_ring_clear_pte(struct vm_area_struct *vma, + unsigned long uvaddr, + pte_t *ptep, int is_fullmm) +{ + pte_t copy; + struct blktap *tap; + unsigned long kvaddr; + struct page **map, *page; + struct blktap_ring *ring; + struct blktap_request *request; + struct grant_handle_pair *khandle; + struct gnttab_unmap_grant_ref unmap[2]; + int offset, seg, usr_idx, count = 0; + + tap = vma_to_blktap(vma); + ring = &tap->ring; + map = ring->foreign_map.map; + BUG_ON(!map); /* TODO Should this be changed to if statement? */ + + /* + * Zap entry if the address is before the start of the grant + * mapped region. + */ + if (uvaddr < ring->user_vstart) + return xen_ptep_get_and_clear_full(vma, uvaddr, + ptep, is_fullmm); + + offset = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT); + usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST; + seg = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST; + + offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT); + page = map[offset]; + if (page) { + ClearPageReserved(page); + if (PageBlkback(page)) { + ClearPageBlkback(page); + set_page_private(page, 0); + } + } + map[offset] = NULL; + + request = tap->pending_requests[usr_idx]; + kvaddr = request_to_kaddr(request, seg); + khandle = request->handles + seg; + + if (khandle->kernel != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[count], kvaddr, + GNTMAP_host_map, khandle->kernel); + count++; + + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + } + + + if (khandle->user != INVALID_GRANT_HANDLE) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + + copy = *ptep; + gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep), + GNTMAP_host_map + | GNTMAP_application_map + | GNTMAP_contains_pte, + khandle->user); + count++; + } else + copy = xen_ptep_get_and_clear_full(vma, uvaddr, ptep, + is_fullmm); + + if (count) + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, count)) + BUG(); + + khandle->kernel = INVALID_GRANT_HANDLE; + khandle->user = INVALID_GRANT_HANDLE; + + return copy; +} + +static void +blktap_ring_vm_unmap(struct vm_area_struct *vma) +{ + struct blktap *tap = vma_to_blktap(vma); + + down_write(&tap->tap_sem); + clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse); + clear_bit(BLKTAP_PAUSED, &tap->dev_inuse); + clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse); + up_write(&tap->tap_sem); +} + +static void +blktap_ring_vm_close(struct vm_area_struct *vma) +{ + struct blktap *tap = vma_to_blktap(vma); + struct blktap_ring *ring = &tap->ring; + + blktap_ring_vm_unmap(vma); /* fail future requests */ + blktap_device_fail_pending_requests(tap); /* fail pending requests */ + blktap_device_restart(tap); /* fail deferred requests */ + + down_write(&tap->tap_sem); + + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); + + kfree(ring->foreign_map.map); + ring->foreign_map.map = NULL; + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(ring->ring.sring)); + free_page((unsigned long)ring->ring.sring); + + BTINFO("unmapping ring %d\n", tap->minor); + ring->ring.sring = NULL; + ring->vma = NULL; + + up_write(&tap->tap_sem); + + wake_up(&tap->wq); +} + +static struct vm_operations_struct blktap_ring_vm_operations = { + .close = blktap_ring_vm_close, + .unmap = blktap_ring_vm_unmap, + .fault = blktap_ring_fault, + .zap_pte = blktap_ring_clear_pte, +}; + +static int +blktap_ring_open(struct inode *inode, struct file *filp) +{ + int idx; + struct blktap *tap; + + idx = iminor(inode); + if (idx < 0 || idx > MAX_BLKTAP_DEVICE || blktaps[idx] == NULL) { + BTERR("unable to open device blktap%d\n", idx); + return -ENODEV; + } + + tap = blktaps[idx]; + + BTINFO("opening device blktap%d\n", idx); + + if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse)) + return -ENODEV; + + /* Only one process can access ring at a time */ + if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse)) + return -EBUSY; + + filp->private_data = tap; + BTINFO("opened device %d\n", tap->minor); + + return 0; +} + +static int +blktap_ring_release(struct inode *inode, struct file *filp) +{ + struct blktap *tap = filp->private_data; + + BTINFO("freeing device %d\n", tap->minor); + clear_bit(BLKTAP_RING_FD, &tap->dev_inuse); + filp->private_data = NULL; + wake_up(&tap->wq); + return 0; +} + +/* Note on mmap: + * We need to map pages to user space in a way that will allow the block + * subsystem set up direct IO to them. This couldn't be done before, because + * there isn't really a sane way to translate a user virtual address down to a + * physical address when the page belongs to another domain. + * + * My first approach was to map the page in to kernel memory, add an entry + * for it in the physical frame list (using alloc_lomem_region as in blkback) + * and then attempt to map that page up to user space. This is disallowed + * by xen though, which realizes that we don't really own the machine frame + * underlying the physical page. + * + * The new approach is to provide explicit support for this in xen linux. + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages + * mapped from other vms. vma->vm_private_data is set up as a mapping + * from pages to actual page structs. There is a new clause in get_user_pages + * that does the right thing for this sort of mapping. + */ +static int +blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size, err; + struct page **map; + struct blktap *tap; + blkif_sring_t *sring; + struct blktap_ring *ring; + + tap = filp->private_data; + ring = &tap->ring; + map = NULL; + sring = NULL; + + if (!tap || test_and_set_bit(BLKTAP_RING_VMA, &tap->dev_inuse)) + return -ENOMEM; + + size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + if (size != (MMAP_PAGES + RING_PAGES)) { + BTERR("you _must_ map exactly %lu pages!\n", + MMAP_PAGES + RING_PAGES); + return -EAGAIN; + } + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (!sring) { + BTERR("Couldn't alloc sring.\n"); + goto fail_mem; + } + + map = kzalloc(size * sizeof(struct page *), GFP_KERNEL); + if (!map) { + BTERR("Couldn't alloc VM_FOREIGN map.\n"); + goto fail_mem; + } + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); + + ring->ring_vstart = vma->vm_start; + ring->user_vstart = ring->ring_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + err = vm_insert_page(vma, vma->vm_start, + virt_to_page(ring->ring.sring)); + else + err = remap_pfn_range(vma, vma->vm_start, + __pa(ring->ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); + if (err) { + BTERR("Mapping user ring failed: %d\n", err); + goto fail; + } + + /* Mark this VM as containing foreign pages, and set up mappings. */ + ring->foreign_map.map = map; + vma->vm_private_data = &ring->foreign_map; + vma->vm_flags |= VM_FOREIGN; + vma->vm_flags |= VM_DONTCOPY; + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &blktap_ring_vm_operations; + +#ifdef CONFIG_X86 + vma->vm_mm->context.has_foreign_mappings = 1; +#endif + + tap->pid = current->pid; + BTINFO("blktap: mapping pid is %d\n", tap->pid); + + ring->vma = vma; + return 0; + + fail: + /* Clear any active mappings. */ + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); + ClearPageReserved(virt_to_page(sring)); + fail_mem: + free_page((unsigned long)sring); + kfree(map); + + return -ENOMEM; +} + +static inline void +blktap_ring_set_message(struct blktap *tap, int msg) +{ + struct blktap_ring *ring = &tap->ring; + + down_read(&tap->tap_sem); + if (ring->ring.sring) + ring->ring.sring->pad[0] = msg; + up_read(&tap->tap_sem); +} + +static int +blktap_ring_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct blktap_params params; + struct blktap *tap = filp->private_data; + + BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); + + switch(cmd) { + case BLKTAP2_IOCTL_KICK_FE: + /* There are fe messages to process. */ + return blktap_read_ring(tap); + + case BLKTAP2_IOCTL_CREATE_DEVICE: + if (!arg) + return -EINVAL; + + if (copy_from_user(¶ms, (struct blktap_params __user *)arg, + sizeof(params))) { + BTERR("failed to get params\n"); + return -EFAULT; + } + + if (blktap_validate_params(tap, ¶ms)) { + BTERR("invalid params\n"); + return -EINVAL; + } + + tap->params = params; + return blktap_device_create(tap); + + case BLKTAP2_IOCTL_SET_PARAMS: + if (!arg) + return -EINVAL; + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EINVAL; + + if (copy_from_user(¶ms, (struct blktap_params __user *)arg, + sizeof(params))) { + BTERR("failed to get params\n"); + return -EFAULT; + } + + if (blktap_validate_params(tap, ¶ms)) { + BTERR("invalid params\n"); + return -EINVAL; + } + + tap->params = params; + return 0; + + case BLKTAP2_IOCTL_PAUSE: + if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) + return -EINVAL; + + set_bit(BLKTAP_PAUSED, &tap->dev_inuse); + clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse); + + blktap_ring_set_message(tap, 0); + wake_up_interruptible(&tap->wq); + + return 0; + + + case BLKTAP2_IOCTL_REOPEN: + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EINVAL; + + if (!arg) + return -EINVAL; + + if (copy_to_user((char __user *)arg, + tap->params.name, + strlen(tap->params.name) + 1)) + return -EFAULT; + + blktap_ring_set_message(tap, 0); + wake_up_interruptible(&tap->wq); + + return 0; + + case BLKTAP2_IOCTL_RESUME: + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EINVAL; + + tap->ring.response = (int)arg; + if (!tap->ring.response) + clear_bit(BLKTAP_PAUSED, &tap->dev_inuse); + + blktap_ring_set_message(tap, 0); + wake_up_interruptible(&tap->wq); + + return 0; + } + + return -ENOIOCTLCMD; +} + +static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + + poll_wait(filp, &ring->poll_wait, wait); + if (ring->ring.sring->pad[0] != 0 || + ring->ring.req_prod_pvt != ring->ring.sring->req_prod) { + RING_PUSH_REQUESTS(&ring->ring); + return POLLIN | POLLRDNORM; + } + + return 0; +} + +static struct file_operations blktap_ring_file_operations = { + .owner = THIS_MODULE, + .open = blktap_ring_open, + .release = blktap_ring_release, + .ioctl = blktap_ring_ioctl, + .mmap = blktap_ring_mmap, + .poll = blktap_ring_poll, +}; + +void +blktap_ring_kick_user(struct blktap *tap) +{ + wake_up_interruptible(&tap->ring.poll_wait); +} + +int +blktap_ring_resume(struct blktap *tap) +{ + int err; + struct blktap_ring *ring = &tap->ring; + + if (!blktap_active(tap)) + return -ENODEV; + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EINVAL; + + /* set shared flag for resume */ + ring->response = 0; + + blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME); + blktap_ring_kick_user(tap); + + wait_event_interruptible(tap->wq, ring->response || + !test_bit(BLKTAP_PAUSED, &tap->dev_inuse)); + + err = ring->response; + ring->response = 0; + + BTDBG("err: %d\n", err); + + if (err) + return err; + + if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EAGAIN; + + return 0; +} + +int +blktap_ring_pause(struct blktap *tap) +{ + if (!blktap_active(tap)) + return -ENODEV; + + if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) + return -EINVAL; + + BTDBG("draining queue\n"); + wait_event_interruptible(tap->wq, !tap->pending_cnt); + if (tap->pending_cnt) + return -EAGAIN; + + blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE); + blktap_ring_kick_user(tap); + + BTDBG("waiting for tapdisk response\n"); + wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse)); + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EAGAIN; + + return 0; +} + +int +blktap_ring_destroy(struct blktap *tap) +{ + if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) && + !test_bit(BLKTAP_RING_VMA, &tap->dev_inuse)) + return 0; + + BTDBG("sending tapdisk close message\n"); + blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_CLOSE); + blktap_ring_kick_user(tap); + + return -EAGAIN; +} + +static void +blktap_ring_initialize(struct blktap_ring *ring, int minor) +{ + memset(ring, 0, sizeof(*ring)); + init_waitqueue_head(&ring->poll_wait); + ring->devno = MKDEV(blktap_ring_major, minor); +} + +int +blktap_ring_create(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + blktap_ring_initialize(ring, tap->minor); + return blktap_sysfs_create(tap); +} + +int +blktap_ring_init(int *major) +{ + int err; + + err = register_chrdev(0, "blktap2", &blktap_ring_file_operations); + if (err < 0) { + BTERR("error registering blktap ring device: %d\n", err); + return err; + } + + blktap_ring_major = *major = err; + BTINFO("blktap ring major: %d\n", blktap_ring_major); + return 0; +} + +int +blktap_ring_free(void) +{ + if (blktap_ring_major) + unregister_chrdev(blktap_ring_major, "blktap2"); + + return 0; +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/blktap.h +++ linux-ec2-2.6.31/drivers/xen/blktap2/blktap.h @@ -0,0 +1,246 @@ +#ifndef _BLKTAP_H_ +#define _BLKTAP_H_ + +#include +#include +#include +#include +#include +#include + +//#define ENABLE_PASSTHROUGH + +extern int blktap_debug_level; + +#define BTPRINTK(level, tag, force, _f, _a...) \ + do { \ + if (blktap_debug_level > level && \ + (force || printk_ratelimit())) \ + printk(tag "%s: " _f, __func__, ##_a); \ + } while (0) + +#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) +#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) +#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) +#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) + +#define MAX_BLKTAP_DEVICE 256 + +#define BLKTAP_CONTROL 1 +#define BLKTAP_RING_FD 2 +#define BLKTAP_RING_VMA 3 +#define BLKTAP_DEVICE 4 +#define BLKTAP_SYSFS 5 +#define BLKTAP_PAUSE_REQUESTED 6 +#define BLKTAP_PAUSED 7 +#define BLKTAP_SHUTDOWN_REQUESTED 8 +#define BLKTAP_PASSTHROUGH 9 +#define BLKTAP_DEFERRED 10 + +/* blktap IOCTLs: */ +#define BLKTAP2_IOCTL_KICK_FE 1 +#define BLKTAP2_IOCTL_ALLOC_TAP 200 +#define BLKTAP2_IOCTL_FREE_TAP 201 +#define BLKTAP2_IOCTL_CREATE_DEVICE 202 +#define BLKTAP2_IOCTL_SET_PARAMS 203 +#define BLKTAP2_IOCTL_PAUSE 204 +#define BLKTAP2_IOCTL_REOPEN 205 +#define BLKTAP2_IOCTL_RESUME 206 + +#define BLKTAP2_MAX_MESSAGE_LEN 256 + +#define BLKTAP2_RING_MESSAGE_PAUSE 1 +#define BLKTAP2_RING_MESSAGE_RESUME 2 +#define BLKTAP2_RING_MESSAGE_CLOSE 3 + +#define BLKTAP_REQUEST_FREE 0 +#define BLKTAP_REQUEST_PENDING 1 + +/* + * The maximum number of requests that can be outstanding at any time + * is determined by + * + * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] + * + * where mmap_alloc < MAX_DYNAMIC_MEM. + * + * TODO: + * mmap_alloc is initialised to 2 and should be adjustable on the fly via + * sysfs. + */ +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) +#define MAX_DYNAMIC_MEM BLK_RING_SIZE +#define MAX_PENDING_REQS BLK_RING_SIZE +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define MMAP_VADDR(_start, _req, _seg) \ + (_start + \ + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + +#define blktap_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blktap_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->wq); \ + } while (0) + +struct blktap; + +struct grant_handle_pair { + grant_handle_t kernel; + grant_handle_t user; +}; +#define INVALID_GRANT_HANDLE 0xFFFF + +struct blktap_handle { + unsigned int ring; + unsigned int device; + unsigned int minor; +}; + +struct blktap_params { + char name[BLKTAP2_MAX_MESSAGE_LEN]; + unsigned long long capacity; + unsigned long sector_size; +}; + +struct blktap_device { + int users; + spinlock_t lock; + struct gendisk *gd; + +#ifdef ENABLE_PASSTHROUGH + struct block_device *bdev; +#endif +}; + +struct blktap_ring { + struct vm_area_struct *vma; + blkif_front_ring_t ring; + struct vm_foreign_map foreign_map; + unsigned long ring_vstart; + unsigned long user_vstart; + + int response; + + wait_queue_head_t poll_wait; + + dev_t devno; + struct device *dev; + atomic_t sysfs_refcnt; + struct mutex sysfs_mutex; +}; + +struct blktap_statistics { + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_rd_sect; + int st_wr_sect; + s64 st_rd_cnt; + s64 st_rd_sum_usecs; + s64 st_rd_max_usecs; + s64 st_wr_cnt; + s64 st_wr_sum_usecs; + s64 st_wr_max_usecs; +}; + +struct blktap_request { + uint64_t id; + uint16_t usr_idx; + + uint8_t status; + atomic_t pendcnt; + uint8_t nr_pages; + unsigned short operation; + + struct timeval time; + struct grant_handle_pair handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct list_head free_list; +}; + +struct blktap { + int minor; + pid_t pid; + atomic_t refcnt; + unsigned long dev_inuse; + + struct blktap_params params; + + struct rw_semaphore tap_sem; + + struct blktap_ring ring; + struct blktap_device device; + + int pending_cnt; + struct blktap_request *pending_requests[MAX_PENDING_REQS]; + struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + + wait_queue_head_t wq; + struct list_head deferred_queue; + + struct blktap_statistics stats; +}; + +extern struct blktap *blktaps[MAX_BLKTAP_DEVICE]; + +static inline int +blktap_active(struct blktap *tap) +{ + return test_bit(BLKTAP_RING_VMA, &tap->dev_inuse); +} + +static inline int +blktap_validate_params(struct blktap *tap, struct blktap_params *params) +{ + /* TODO: sanity check */ + params->name[sizeof(params->name) - 1] = '\0'; + BTINFO("%s: capacity: %llu, sector-size: %lu\n", + params->name, params->capacity, params->sector_size); + return 0; +} + +int blktap_control_destroy_device(struct blktap *); + +int blktap_ring_init(int *); +int blktap_ring_free(void); +int blktap_ring_create(struct blktap *); +int blktap_ring_destroy(struct blktap *); +int blktap_ring_pause(struct blktap *); +int blktap_ring_resume(struct blktap *); +void blktap_ring_kick_user(struct blktap *); + +int blktap_sysfs_init(void); +void blktap_sysfs_free(void); +int blktap_sysfs_create(struct blktap *); +int blktap_sysfs_destroy(struct blktap *); + +int blktap_device_init(int *); +void blktap_device_free(void); +int blktap_device_create(struct blktap *); +int blktap_device_destroy(struct blktap *); +int blktap_device_pause(struct blktap *); +int blktap_device_resume(struct blktap *); +void blktap_device_restart(struct blktap *); +void blktap_device_finish_request(struct blktap *, + blkif_response_t *, + struct blktap_request *); +void blktap_device_fail_pending_requests(struct blktap *); +#ifdef ENABLE_PASSTHROUGH +int blktap_device_enable_passthrough(struct blktap *, + unsigned, unsigned); +#endif + +void blktap_defer(struct blktap *); +void blktap_run_deferred(void); + +int blktap_request_pool_init(void); +void blktap_request_pool_free(void); +int blktap_request_pool_grow(void); +int blktap_request_pool_shrink(void); +struct blktap_request *blktap_request_allocate(struct blktap *); +void blktap_request_free(struct blktap *, struct blktap_request *); +unsigned long request_to_kaddr(struct blktap_request *, int); + +#endif --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/device.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/device.c @@ -0,0 +1,1197 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "blktap.h" + +#include "../blkback/blkback-pagemap.h" + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +struct blktap_grant_table { + int cnt; + struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2]; +}; + +static int blktap_device_major; + +static inline struct blktap * +dev_to_blktap(struct blktap_device *dev) +{ + return container_of(dev, struct blktap, device); +} + +static int +blktap_device_open(struct block_device *bd, fmode_t mode) +{ + struct blktap *tap; + struct blktap_device *dev = bd->bd_disk->private_data; + + if (!dev) + return -ENOENT; + + tap = dev_to_blktap(dev); + if (!blktap_active(tap) || + test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + return -ENOENT; + + dev->users++; + + return 0; +} + +static int +blktap_device_release(struct gendisk *disk, fmode_t mode) +{ + struct blktap_device *dev = disk->private_data; + struct blktap *tap = dev_to_blktap(dev); + + dev->users--; + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + blktap_device_destroy(tap); + + return 0; +} + +static int +blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) +{ + /* We don't have real geometry info, but let's at least return + values consistent with the size of the device */ + sector_t nsect = get_capacity(bd->bd_disk); + sector_t cylinders = nsect; + + hg->heads = 0xff; + hg->sectors = 0x3f; + sector_div(cylinders, hg->heads * hg->sectors); + hg->cylinders = cylinders; + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) + hg->cylinders = 0xffff; + return 0; +} + +static int +blktap_device_ioctl(struct block_device *bd, fmode_t mode, + unsigned command, unsigned long argument) +{ + int i; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx\n", + command, (long)argument); + + switch (command) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + case HDIO_GETGEO: { + struct hd_geometry geo; + int ret; + + if (!argument) + return -EINVAL; + + geo.start = get_start_sect(bd); + ret = blktap_device_getgeo(bd, &geo); + if (ret) + return ret; + + if (copy_to_user((struct hd_geometry __user *)argument, &geo, + sizeof(geo))) + return -EFAULT; + + return 0; + } +#endif + case CDROMMULTISESSION: + BTDBG("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_IDLUN: + if (!access_ok(VERIFY_WRITE, argument, + sizeof(struct scsi_idlun))) + return -EFAULT; + + /* return 0 for now. */ + __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); + __put_user(0, + &((struct scsi_idlun __user *)argument)->host_unique_id); + return 0; + + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; +} + +static struct block_device_operations blktap_device_file_operations = { + .owner = THIS_MODULE, + .open = blktap_device_open, + .release = blktap_device_release, + .ioctl = blktap_device_ioctl, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + .getgeo = blktap_device_getgeo +#endif +}; + +static int +blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page, + unsigned long addr, void *data) +{ + pte_t *pte = (pte_t *)data; + + BTDBG("ptep %p -> %012llx\n", ptep, (unsigned long long)pte_val(*pte)); + set_pte(ptep, *pte); + return 0; +} + +static int +blktap_map_uaddr(struct vm_area_struct *vma, unsigned long address, pte_t pte) +{ + return apply_to_page_range(vma ? vma->vm_mm : NULL, address, + PAGE_SIZE, blktap_map_uaddr_fn, &pte); +} + +static int +blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page, + unsigned long addr, void *data) +{ + struct vm_area_struct *vma = data; + + BTDBG("ptep %p\n", ptep); + xen_ptep_get_and_clear_full(vma, addr, ptep, 1); + return 0; +} + +static int +blktap_umap_uaddr(struct vm_area_struct *vma, unsigned long address) +{ + struct mm_struct *mm = NULL; + + if (!vma) { +#ifdef CONFIG_X86 + if (HYPERVISOR_update_va_mapping(address, __pte(0), + UVMF_INVLPG|UVMF_ALL)) + BUG(); + return 1; +#endif + } else + mm = vma->vm_mm; + return apply_to_page_range(mm, address, + PAGE_SIZE, blktap_umap_uaddr_fn, vma); +} + +static inline void +flush_tlb_kernel_page(unsigned long kvaddr) +{ +#ifdef CONFIG_X86 + xen_invlpg_all(kvaddr); +#else + flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); +#endif +} + +static void +blktap_device_end_dequeued_request(struct blktap_device *dev, + struct request *req, int uptodate, int ret) +{ + spin_lock_irq(&dev->lock); + ret = __blk_end_request(req, ret, blk_rq_bytes(req)); + spin_unlock_irq(&dev->lock); + BUG_ON(ret); +} + +/* + * tap->tap_sem held on entry + */ +static void +blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request) +{ + uint64_t ptep; + int ret, usr_idx; + unsigned int i, cnt; + struct page **map, *page; + struct blktap_ring *ring; + struct grant_handle_pair *khandle; + unsigned long kvaddr, uvaddr, offset; + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2]; + grant_handle_t self_gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int self_gref_nr = 0; + + cnt = 0; + ring = &tap->ring; + usr_idx = request->usr_idx; + map = ring->foreign_map.map; + + if (!ring->vma) + return; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + zap_page_range(ring->vma, + MMAP_VADDR(ring->user_vstart, usr_idx, 0), + request->nr_pages << PAGE_SHIFT, NULL); + + for (i = 0; i < request->nr_pages; i++) { + kvaddr = request_to_kaddr(request, i); + uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i); + + khandle = request->handles + i; + + if (khandle->kernel != INVALID_GRANT_HANDLE) { + gnttab_set_unmap_op(&unmap[cnt], kvaddr, + GNTMAP_host_map, khandle->kernel); + cnt++; + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + } + + if (khandle->user != INVALID_GRANT_HANDLE) { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + if (create_lookup_pte_addr(ring->vma->vm_mm, + uvaddr, &ptep) != 0) { + BTERR("Couldn't get a pte addr!\n"); + return; + } + + gnttab_set_unmap_op(&unmap[cnt], ptep, + GNTMAP_host_map + | GNTMAP_application_map + | GNTMAP_contains_pte, + khandle->user); + cnt++; + } + + offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT; + + BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, " + "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: " + "0x%08lx, handle: %u\n", offset, map[offset], request, + usr_idx, i, kvaddr, khandle->kernel, uvaddr, + khandle->user); + + page = map[offset]; + if (page) { + ClearPageReserved(map[offset]); + if (PageBlkback(page)) { + ClearPageBlkback(page); + set_page_private(page, 0); + } else if ( + xen_feature(XENFEAT_auto_translated_physmap)) { + self_gref[self_gref_nr] = khandle->kernel; + self_gref_nr++; + } + } + map[offset] = NULL; + + khandle->kernel = INVALID_GRANT_HANDLE; + khandle->user = INVALID_GRANT_HANDLE; + } + + if (cnt) { + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, cnt); + BUG_ON(ret); + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + zap_page_range(ring->vma, + MMAP_VADDR(ring->user_vstart, usr_idx, 0), + request->nr_pages << PAGE_SHIFT, NULL); + else { + for (i = 0; i < self_gref_nr; i++) { + gnttab_end_foreign_access_ref(self_gref[i]); + } + } +} + +/* + * tap->tap_sem held on entry + */ +static void +blktap_unmap(struct blktap *tap, struct blktap_request *request) +{ + int i, usr_idx; + unsigned long kvaddr; + + usr_idx = request->usr_idx; + down_write(&tap->ring.vma->vm_mm->mmap_sem); + + for (i = 0; i < request->nr_pages; i++) { + BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, " + "uvaddr: 0x%08lx, uhandle: %u\n", request, i, + request_to_kaddr(request, i), + request->handles[i].kernel, + MMAP_VADDR(tap->ring.user_vstart, usr_idx, i), + request->handles[i].user); + + if (!xen_feature(XENFEAT_auto_translated_physmap) && + request->handles[i].kernel == INVALID_GRANT_HANDLE) { + kvaddr = request_to_kaddr(request, i); + if (blktap_umap_uaddr(NULL, kvaddr) == 0) + flush_tlb_kernel_page(kvaddr); + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + } + } + + blktap_device_fast_flush(tap, request); + up_write(&tap->ring.vma->vm_mm->mmap_sem); +} + +/* + * called if the tapdisk process dies unexpectedly. + * fail and release any pending requests and disable queue. + */ +void +blktap_device_fail_pending_requests(struct blktap *tap) +{ + int usr_idx; + struct request *req; + struct blktap_device *dev; + struct blktap_request *request; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + return; + + down_write(&tap->tap_sem); + + dev = &tap->device; + for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { + request = tap->pending_requests[usr_idx]; + if (!request || request->status != BLKTAP_REQUEST_PENDING) + continue; + + BTERR("%u:%u: failing pending %s of %d pages\n", + blktap_device_major, tap->minor, + (request->operation == BLKIF_OP_READ ? + "read" : "write"), request->nr_pages); + + blktap_unmap(tap, request); + req = (struct request *)(unsigned long)request->id; + blktap_device_end_dequeued_request(dev, req, 0, -ENODEV); + blktap_request_free(tap, request); + } + + up_write(&tap->tap_sem); + + spin_lock_irq(&dev->lock); + + /* fail any future requests */ + dev->gd->queue->queuedata = NULL; + blk_start_queue(dev->gd->queue); + + spin_unlock_irq(&dev->lock); +} + +/* + * tap->tap_sem held on entry + */ +void +blktap_device_finish_request(struct blktap *tap, + blkif_response_t *res, + struct blktap_request *request) +{ + int uptodate; + struct request *req; + struct blktap_device *dev; + + dev = &tap->device; + + blktap_unmap(tap, request); + + req = (struct request *)(unsigned long)request->id; + uptodate = (res->status == BLKIF_RSP_OKAY); + + BTDBG("req %p res status %d operation %d/%d id %lld\n", req, + res->status, res->operation, request->operation, + (unsigned long long)res->id); + + switch (request->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(res->status != BLKIF_RSP_OKAY)) + BTERR("Bad return from device data " + "request: %x\n", res->status); + blktap_device_end_dequeued_request(dev, req, uptodate, + res->status == BLKIF_RSP_OKAY ? 0 : -EIO); + break; + default: + BUG(); + } + + blktap_request_free(tap, request); +} + +static int +blktap_prep_foreign(struct blktap *tap, + struct blktap_request *request, + blkif_request_t *blkif_req, + unsigned int seg, struct page *page, + struct blktap_grant_table *table) +{ + uint64_t ptep; + uint32_t flags; + struct page *tap_page; + struct blktap_ring *ring; + struct blkback_pagemap map; + unsigned long uvaddr, kvaddr; + + ring = &tap->ring; + map = blkback_pagemap_read(page); + blkif_req->seg[seg].gref = map.gref; + + uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); + kvaddr = request_to_kaddr(request, seg); + flags = GNTMAP_host_map | + (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0); + + gnttab_set_map_op(&table->grants[table->cnt], + kvaddr, flags, map.gref, map.domid); + table->cnt++; + + /* enable chained tap devices */ + tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + set_page_private(tap_page, page_private(page)); + SetPageBlkback(tap_page); + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) { + BTERR("couldn't get a pte addr!\n"); + return -1; + } + + flags |= GNTMAP_application_map | GNTMAP_contains_pte; + gnttab_set_map_op(&table->grants[table->cnt], + ptep, flags, map.gref, map.domid); + table->cnt++; + + return 0; +} + +static int +blktap_map_foreign(struct blktap *tap, + struct blktap_request *request, + blkif_request_t *blkif_req, + struct blktap_grant_table *table) +{ + struct page *page; + int i, grant, err, usr_idx; + struct blktap_ring *ring; + unsigned long uvaddr, kvaddr, foreign_mfn; + + if (!table->cnt) + return 0; + + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + table->grants, table->cnt); + BUG_ON(err); + + grant = 0; + usr_idx = request->usr_idx; + ring = &tap->ring; + + for (i = 0; i < request->nr_pages; i++) { + if (!blkif_req->seg[i].gref) + continue; + + uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i); + kvaddr = request_to_kaddr(request, i); + + if (unlikely(table->grants[grant].status)) { + BTERR("invalid kernel buffer: could not remap it\n"); + err |= 1; + table->grants[grant].handle = INVALID_GRANT_HANDLE; + } + + request->handles[i].kernel = table->grants[grant].handle; + foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT; + grant++; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + goto done; + + if (unlikely(table->grants[grant].status)) { + BTERR("invalid user buffer: could not remap it\n"); + err |= 1; + table->grants[grant].handle = INVALID_GRANT_HANDLE; + } + + request->handles[i].user = table->grants[grant].handle; + grant++; + + done: + if (err) + continue; + + page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + FOREIGN_FRAME(foreign_mfn)); + else if (vm_insert_page(ring->vma, uvaddr, page)) + err |= 1; + + BTDBG("pending_req: %p, seg: %d, page: %p, " + "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, " + "uhandle: %u\n", request, i, page, + kvaddr, request->handles[i].kernel, + uvaddr, request->handles[i].user); + } + + return err; +} + +static int +blktap_map(struct blktap *tap, + struct blktap_request *request, + unsigned int seg, struct page *page) +{ + pte_t pte; + int usr_idx; + struct blktap_ring *ring; + unsigned long uvaddr, kvaddr; + int err = 0; + + ring = &tap->ring; + usr_idx = request->usr_idx; + uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg); + kvaddr = request_to_kaddr(request, seg); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + pte = mk_pte(page, ring->vma->vm_page_prot); + blktap_map_uaddr(ring->vma, uvaddr, pte_mkwrite(pte)); + flush_tlb_page(ring->vma, uvaddr); + blktap_map_uaddr(NULL, kvaddr, mk_pte(page, PAGE_KERNEL)); + flush_tlb_kernel_page(kvaddr); + + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte)); + request->handles[seg].kernel = INVALID_GRANT_HANDLE; + } else { + /* grant this page access to self domain and map it. */ + domid_t domid = 0; /* XXX my domian id: grant table hypercall + doesn't understand DOMID_SELF */ + int gref; + uint32_t flags; + struct gnttab_map_grant_ref map; + struct page *tap_page; + + gref = gnttab_grant_foreign_access( + domid, page_to_pfn(page), + (request->operation == BLKIF_OP_WRITE)? + GTF_readonly: 0); + + flags = GNTMAP_host_map | + (request->operation == BLKIF_OP_WRITE ? + GNTMAP_readonly : 0); + + gnttab_set_map_op(&map, kvaddr, flags, gref, domid); + + /* enable chained tap devices */ + tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + set_page_private(tap_page, page_private(page)); + SetPageBlkback(tap_page); + + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + &map, 1); + BUG_ON(err); + + err = vm_insert_page(ring->vma, uvaddr, tap_page); + if (err) { + struct gnttab_unmap_grant_ref unmap; + gnttab_set_unmap_op(&unmap, kvaddr, + GNTMAP_host_map, gref); + VOID(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &unmap, 1)); + } else + request->handles[seg].kernel = gref; + } + request->handles[seg].user = INVALID_GRANT_HANDLE; + + BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, " + "uvaddr: 0x%08lx\n", request, seg, page, kvaddr, + uvaddr); + + return err; +} + +static int +blktap_device_process_request(struct blktap *tap, + struct blktap_request *request, + struct request *req) +{ + struct page *page; + int i, usr_idx, err; + struct blktap_ring *ring; + struct scatterlist *sg; + struct blktap_grant_table table; + unsigned int fsect, lsect, nr_sects; + unsigned long offset, uvaddr, kvaddr; + struct blkif_request blkif_req, *target; + + err = -1; + memset(&table, 0, sizeof(table)); + + if (!blktap_active(tap)) + goto out; + + ring = &tap->ring; + usr_idx = request->usr_idx; + blkif_req.id = usr_idx; + blkif_req.sector_number = (blkif_sector_t)blk_rq_pos(req); + blkif_req.handle = 0; + blkif_req.operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + + request->id = (unsigned long)req; + request->operation = blkif_req.operation; + request->status = BLKTAP_REQUEST_PENDING; + do_gettimeofday(&request->time); + + nr_sects = 0; + request->nr_pages = 0; + blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg); + BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + for_each_sg(tap->sg, sg, blkif_req.nr_segments, i) { + fsect = sg->offset >> 9; + lsect = fsect + (sg->length >> 9) - 1; + nr_sects += sg->length >> 9; + + blkif_req.seg[i] = + (struct blkif_request_segment) { + .gref = 0, + .first_sect = fsect, + .last_sect = lsect }; + + if (PageBlkback(sg_page(sg))) { + /* foreign page -- use xen */ + if (blktap_prep_foreign(tap, + request, + &blkif_req, + i, + sg_page(sg), + &table)) + goto out; + } else { + /* do it the old fashioned way */ + if (blktap_map(tap, + request, + i, + sg_page(sg))) + goto out; + } + + uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i); + kvaddr = request_to_kaddr(request, i); + offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT; + page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + ring->foreign_map.map[offset] = page; + SetPageReserved(page); + + BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n", + uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT); + BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, " + "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n", + offset, request, i, + page, kvaddr, uvaddr); + + request->nr_pages++; + } + + if (blktap_map_foreign(tap, request, &blkif_req, &table)) + goto out; + + /* Finally, write the request message to the user ring. */ + target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); + memcpy(target, &blkif_req, sizeof(blkif_req)); + target->id = request->usr_idx; + wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */ + ring->ring.req_prod_pvt++; + + if (rq_data_dir(req)) { + tap->stats.st_wr_sect += nr_sects; + tap->stats.st_wr_req++; + } else { + tap->stats.st_rd_sect += nr_sects; + tap->stats.st_rd_req++; + } + + err = 0; + +out: + if (err) + blktap_device_fast_flush(tap, request); + return err; +} + +#ifdef ENABLE_PASSTHROUGH +#define rq_for_each_bio_safe(_bio, _tmp, _req) \ + if ((_req)->bio) \ + for (_bio = (_req)->bio; \ + _bio && ((_tmp = _bio->bi_next) || 1); \ + _bio = _tmp) + +static void +blktap_device_forward_request(struct blktap *tap, struct request *req) +{ + struct bio *bio, *tmp; + struct blktap_device *dev; + + dev = &tap->device; + + rq_for_each_bio_safe(bio, tmp, req) { + bio->bi_bdev = dev->bdev; + submit_bio(bio->bi_rw, bio); + } +} + +static void +blktap_device_close_bdev(struct blktap *tap) +{ + struct blktap_device *dev; + + dev = &tap->device; + + if (dev->bdev) + blkdev_put(dev->bdev); + + dev->bdev = NULL; + clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse); +} + +static int +blktap_device_open_bdev(struct blktap *tap, u32 pdev) +{ + struct block_device *bdev; + struct blktap_device *dev; + + dev = &tap->device; + + bdev = open_by_devnum(pdev, FMODE_WRITE); + if (IS_ERR(bdev)) { + BTERR("opening device %x:%x failed: %ld\n", + MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev)); + return PTR_ERR(bdev); + } + + if (!bdev->bd_disk) { + BTERR("device %x:%x doesn't exist\n", + MAJOR(pdev), MINOR(pdev)); + blkdev_put(dev->bdev); + return -ENOENT; + } + + dev->bdev = bdev; + set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse); + + /* TODO: readjust queue parameters */ + + BTINFO("set device %d to passthrough on %x:%x\n", + tap->minor, MAJOR(pdev), MINOR(pdev)); + + return 0; +} + +int +blktap_device_enable_passthrough(struct blktap *tap, + unsigned major, unsigned minor) +{ + u32 pdev; + struct blktap_device *dev; + + dev = &tap->device; + pdev = MKDEV(major, minor); + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return -EINVAL; + + if (dev->bdev) { + if (pdev) + return -EINVAL; + blktap_device_close_bdev(tap); + return 0; + } + + return blktap_device_open_bdev(tap, pdev); +} +#endif + +/* + * dev->lock held on entry + */ +static void +blktap_device_run_queue(struct blktap *tap) +{ + int queued, err; + struct request_queue *rq; + struct request *req; + struct blktap_ring *ring; + struct blktap_device *dev; + struct blktap_request *request; + + queued = 0; + ring = &tap->ring; + dev = &tap->device; + rq = dev->gd->queue; + + BTDBG("running queue for %d\n", tap->minor); + + while ((req = blk_peek_request(rq)) != NULL) { + if (RING_FULL(&ring->ring)) { + wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + blktap_defer(tap); + break; + } + + blk_start_request(req); + + if (!blk_fs_request(req)) { + __blk_end_request_all(req, -EIO); + continue; + } + + if (blk_barrier_rq(req)) { + __blk_end_request_all(req, 0); + continue; + } + +#ifdef ENABLE_PASSTHROUGH + if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) { + blktap_device_forward_request(tap, req); + continue; + } +#endif + + request = blktap_request_allocate(tap); + if (!request) { + tap->stats.st_oo_req++; + goto wait; + } + + BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%x) " + "buffer:%p [%s], pending: %p\n", req, tap->minor, + req->cmd, (unsigned long long)blk_rq_pos(req), + blk_rq_cur_sectors(req), blk_rq_sectors(req), req->buffer, + rq_data_dir(req) ? "write" : "read", request); + + spin_unlock_irq(&dev->lock); + down_read(&tap->tap_sem); + + err = blktap_device_process_request(tap, request, req); + if (!err) + queued++; + else { + blktap_device_end_dequeued_request(dev, req, 0, err); + blktap_request_free(tap, request); + } + + up_read(&tap->tap_sem); + spin_lock_irq(&dev->lock); + } + + if (queued) + blktap_ring_kick_user(tap); +} + +/* + * dev->lock held on entry + */ +static void +blktap_device_do_request(struct request_queue *rq) +{ + struct request *req; + struct blktap *tap; + struct blktap_device *dev; + + dev = rq->queuedata; + if (!dev) + goto fail; + + tap = dev_to_blktap(dev); + if (!blktap_active(tap)) + goto fail; + + if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) || + test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) { + blktap_defer(tap); + return; + } + + blktap_device_run_queue(tap); + return; + +fail: + while ((req = blk_peek_request(rq))) { + BTERR("device closed: failing secs %llu - %llu\n", + (unsigned long long)blk_rq_pos(req), + (unsigned long long)blk_rq_pos(req) + + blk_rq_cur_sectors(req)); + blk_start_request(req); + __blk_end_request_all(req, -EIO); + } +} + +void +blktap_device_restart(struct blktap *tap) +{ + struct blktap_device *dev; + + dev = &tap->device; + if (!dev->gd || !dev->gd->queue) + return; + + if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) { + blktap_defer(tap); + return; + } + + if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) || + test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) { + blktap_defer(tap); + return; + } + + spin_lock_irq(&dev->lock); + + /* Re-enable calldowns. */ + if (blk_queue_stopped(dev->gd->queue)) + blk_start_queue(dev->gd->queue); + + /* Kick things off immediately. */ + blktap_device_do_request(dev->gd->queue); + + spin_unlock_irq(&dev->lock); +} + +static void +blktap_device_configure(struct blktap *tap) +{ + struct request_queue *rq; + struct blktap_device *dev = &tap->device; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd) + return; + + dev = &tap->device; + rq = dev->gd->queue; + + spin_lock_irq(&dev->lock); + + set_capacity(dev->gd, tap->params.capacity); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_logical_block_size(rq, tap->params.sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + spin_unlock_irq(&dev->lock); +} + +int +blktap_device_resume(struct blktap *tap) +{ + int err; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap)) + return -ENODEV; + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return 0; + + err = blktap_ring_resume(tap); + if (err) + return err; + + /* device size may have changed */ + blktap_device_configure(tap); + + BTDBG("restarting device\n"); + blktap_device_restart(tap); + + return 0; +} + +int +blktap_device_pause(struct blktap *tap) +{ + unsigned long flags; + struct blktap_device *dev = &tap->device; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap)) + return -ENODEV; + + if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) + return 0; + + spin_lock_irqsave(&dev->lock, flags); + + blk_stop_queue(dev->gd->queue); + set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse); + + spin_unlock_irqrestore(&dev->lock, flags); + + return blktap_ring_pause(tap); +} + +int +blktap_device_destroy(struct blktap *tap) +{ + struct blktap_device *dev = &tap->device; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + return 0; + + BTINFO("destroy device %d users %d\n", tap->minor, dev->users); + + if (dev->users) + return -EBUSY; + + spin_lock_irq(&dev->lock); + /* No more blktap_device_do_request(). */ + blk_stop_queue(dev->gd->queue); + clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); + spin_unlock_irq(&dev->lock); + +#ifdef ENABLE_PASSTHROUGH + if (dev->bdev) + blktap_device_close_bdev(tap); +#endif + + del_gendisk(dev->gd); + put_disk(dev->gd); + blk_cleanup_queue(dev->gd->queue); + + dev->gd = NULL; + + wake_up(&tap->wq); + + return 0; +} + +int +blktap_device_create(struct blktap *tap) +{ + int minor, err; + struct gendisk *gd; + struct request_queue *rq; + struct blktap_device *dev; + + gd = NULL; + rq = NULL; + dev = &tap->device; + minor = tap->minor; + + if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + return -EEXIST; + + if (blktap_validate_params(tap, &tap->params)) + return -EINVAL; + + BTINFO("minor %d sectors %Lu sector-size %lu\n", + minor, tap->params.capacity, tap->params.sector_size); + + err = -ENODEV; + + gd = alloc_disk(1); + if (!gd) + goto error; + + if (minor < 26) + sprintf(gd->disk_name, "tapdev%c", 'a' + minor); + else + sprintf(gd->disk_name, "tapdev%c%c", + 'a' + ((minor / 26) - 1), 'a' + (minor % 26)); + + gd->major = blktap_device_major; + gd->first_minor = minor; + gd->fops = &blktap_device_file_operations; + gd->private_data = dev; + + spin_lock_init(&dev->lock); + rq = blk_init_queue(blktap_device_do_request, &dev->lock); + if (!rq) + goto error; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) + elevator_init(rq, "noop"); +#else + elevator_init(rq, &elevator_noop); +#endif + + gd->queue = rq; + rq->queuedata = dev; + dev->gd = gd; + + set_bit(BLKTAP_DEVICE, &tap->dev_inuse); + blktap_device_configure(tap); + + add_disk(gd); + + err = 0; + goto out; + + error: + if (gd) + del_gendisk(gd); + if (rq) + blk_cleanup_queue(rq); + + out: + BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err); + return err; +} + +int +blktap_device_init(int *maj) +{ + int major; + + /* Dynamically allocate a major for this device */ + major = register_blkdev(0, "tapdev"); + if (major < 0) { + BTERR("Couldn't register blktap device\n"); + return -ENOMEM; + } + + blktap_device_major = *maj = major; + BTINFO("blktap device major %d\n", major); + + return 0; +} + +void +blktap_device_free(void) +{ + if (blktap_device_major) + unregister_blkdev(blktap_device_major, "tapdev"); +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/request.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/request.c @@ -0,0 +1,297 @@ +#include +#include + +#include "blktap.h" + +#define MAX_BUCKETS 8 +#define BUCKET_SIZE MAX_PENDING_REQS + +#define BLKTAP_POOL_CLOSING 1 + +struct blktap_request_bucket; + +struct blktap_request_handle { + int slot; + uint8_t inuse; + struct blktap_request request; + struct blktap_request_bucket *bucket; +}; + +struct blktap_request_bucket { + atomic_t reqs_in_use; + struct blktap_request_handle handles[BUCKET_SIZE]; + struct page **foreign_pages; +}; + +struct blktap_request_pool { + spinlock_t lock; + uint8_t status; + struct list_head free_list; + atomic_t reqs_in_use; + wait_queue_head_t wait_queue; + struct blktap_request_bucket *buckets[MAX_BUCKETS]; +}; + +static struct blktap_request_pool pool; + +static inline struct blktap_request_handle * +blktap_request_to_handle(struct blktap_request *req) +{ + return container_of(req, struct blktap_request_handle, request); +} + +static void +blktap_request_pool_init_request(struct blktap_request *request) +{ + int i; + + request->usr_idx = -1; + request->nr_pages = 0; + request->status = BLKTAP_REQUEST_FREE; + INIT_LIST_HEAD(&request->free_list); + for (i = 0; i < ARRAY_SIZE(request->handles); i++) { + request->handles[i].user = INVALID_GRANT_HANDLE; + request->handles[i].kernel = INVALID_GRANT_HANDLE; + } +} + +static int +blktap_request_pool_allocate_bucket(void) +{ + int i, idx; + unsigned long flags; + struct blktap_request *request; + struct blktap_request_handle *handle; + struct blktap_request_bucket *bucket; + + bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL); + if (!bucket) + goto fail; + + bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES); + if (!bucket->foreign_pages) + goto fail; + + spin_lock_irqsave(&pool.lock, flags); + + idx = -1; + for (i = 0; i < MAX_BUCKETS; i++) { + if (!pool.buckets[i]) { + idx = i; + pool.buckets[idx] = bucket; + break; + } + } + + if (idx == -1) { + spin_unlock_irqrestore(&pool.lock, flags); + goto fail; + } + + for (i = 0; i < BUCKET_SIZE; i++) { + handle = bucket->handles + i; + request = &handle->request; + + handle->slot = i; + handle->inuse = 0; + handle->bucket = bucket; + + blktap_request_pool_init_request(request); + list_add_tail(&request->free_list, &pool.free_list); + } + + spin_unlock_irqrestore(&pool.lock, flags); + + return 0; + +fail: + if (bucket && bucket->foreign_pages) + free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES); + kfree(bucket); + return -ENOMEM; +} + +static void +blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket) +{ + if (!bucket) + return; + + BTDBG("freeing bucket %p\n", bucket); + + free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES); + kfree(bucket); +} + +unsigned long +request_to_kaddr(struct blktap_request *req, int seg) +{ + struct blktap_request_handle *handle = blktap_request_to_handle(req); + int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + unsigned long pfn = page_to_pfn(handle->bucket->foreign_pages[idx]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +int +blktap_request_pool_shrink(void) +{ + int i, err; + unsigned long flags; + struct blktap_request_bucket *bucket; + + err = -EAGAIN; + + spin_lock_irqsave(&pool.lock, flags); + + /* always keep at least one bucket */ + for (i = 1; i < MAX_BUCKETS; i++) { + bucket = pool.buckets[i]; + if (!bucket) + continue; + + if (atomic_read(&bucket->reqs_in_use)) + continue; + + blktap_request_pool_free_bucket(bucket); + pool.buckets[i] = NULL; + err = 0; + break; + } + + spin_unlock_irqrestore(&pool.lock, flags); + + return err; +} + +int +blktap_request_pool_grow(void) +{ + return blktap_request_pool_allocate_bucket(); +} + +struct blktap_request * +blktap_request_allocate(struct blktap *tap) +{ + int i; + uint16_t usr_idx; + unsigned long flags; + struct blktap_request *request; + + usr_idx = -1; + request = NULL; + + spin_lock_irqsave(&pool.lock, flags); + + if (pool.status == BLKTAP_POOL_CLOSING) + goto out; + + for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++) + if (!tap->pending_requests[i]) { + usr_idx = i; + break; + } + + if (usr_idx == (uint16_t)-1) + goto out; + + if (!list_empty(&pool.free_list)) { + request = list_entry(pool.free_list.next, + struct blktap_request, free_list); + list_del(&request->free_list); + } + + if (request) { + struct blktap_request_handle *handle; + + atomic_inc(&pool.reqs_in_use); + + handle = blktap_request_to_handle(request); + atomic_inc(&handle->bucket->reqs_in_use); + handle->inuse = 1; + + request->usr_idx = usr_idx; + + tap->pending_requests[usr_idx] = request; + tap->pending_cnt++; + } + +out: + spin_unlock_irqrestore(&pool.lock, flags); + return request; +} + +void +blktap_request_free(struct blktap *tap, struct blktap_request *request) +{ + int free; + unsigned long flags; + struct blktap_request_handle *handle; + + BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests)); + handle = blktap_request_to_handle(request); + + spin_lock_irqsave(&pool.lock, flags); + + handle->inuse = 0; + tap->pending_requests[request->usr_idx] = NULL; + blktap_request_pool_init_request(request); + list_add(&request->free_list, &pool.free_list); + atomic_dec(&handle->bucket->reqs_in_use); + free = atomic_dec_and_test(&pool.reqs_in_use); + + spin_unlock_irqrestore(&pool.lock, flags); + + if (--tap->pending_cnt == 0) + wake_up_interruptible(&tap->wq); + + if (free) + wake_up(&pool.wait_queue); +} + +void +blktap_request_pool_free(void) +{ + int i; + unsigned long flags; + + spin_lock_irqsave(&pool.lock, flags); + + pool.status = BLKTAP_POOL_CLOSING; + while (atomic_read(&pool.reqs_in_use)) { + spin_unlock_irqrestore(&pool.lock, flags); + wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use)); + spin_lock_irqsave(&pool.lock, flags); + } + + for (i = 0; i < MAX_BUCKETS; i++) { + blktap_request_pool_free_bucket(pool.buckets[i]); + pool.buckets[i] = NULL; + } + + spin_unlock_irqrestore(&pool.lock, flags); +} + +int +blktap_request_pool_init(void) +{ + int i, err; + + memset(&pool, 0, sizeof(pool)); + + spin_lock_init(&pool.lock); + INIT_LIST_HEAD(&pool.free_list); + atomic_set(&pool.reqs_in_use, 0); + init_waitqueue_head(&pool.wait_queue); + + for (i = 0; i < 2; i++) { + err = blktap_request_pool_allocate_bucket(); + if (err) + goto fail; + } + + return 0; + +fail: + blktap_request_pool_free(); + return err; +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/control.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/control.c @@ -0,0 +1,278 @@ +#include +#include + +#include "blktap.h" + +static DEFINE_SPINLOCK(blktap_control_lock); +struct blktap *blktaps[MAX_BLKTAP_DEVICE]; + +static int ring_major; +static int device_major; +static int blktap_control_registered; + +static void +blktap_control_initialize_tap(struct blktap *tap) +{ + int minor = tap->minor; + + memset(tap, 0, sizeof(*tap)); + set_bit(BLKTAP_CONTROL, &tap->dev_inuse); + init_rwsem(&tap->tap_sem); + sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); + init_waitqueue_head(&tap->wq); + atomic_set(&tap->refcnt, 0); + + tap->minor = minor; +} + +static struct blktap * +blktap_control_create_tap(void) +{ + int minor; + struct blktap *tap; + + tap = kmalloc(sizeof(*tap), GFP_KERNEL); + if (unlikely(!tap)) + return NULL; + + blktap_control_initialize_tap(tap); + + spin_lock_irq(&blktap_control_lock); + for (minor = 0; minor < MAX_BLKTAP_DEVICE; minor++) + if (!blktaps[minor]) + break; + + if (minor == MAX_BLKTAP_DEVICE) { + kfree(tap); + tap = NULL; + goto out; + } + + tap->minor = minor; + blktaps[minor] = tap; + +out: + spin_unlock_irq(&blktap_control_lock); + return tap; +} + +static struct blktap * +blktap_control_allocate_tap(void) +{ + int err, minor; + struct blktap *tap; + + /* + * This is called only from the ioctl, which + * means we should always have interrupts enabled. + */ + BUG_ON(irqs_disabled()); + + spin_lock_irq(&blktap_control_lock); + + for (minor = 0; minor < MAX_BLKTAP_DEVICE; minor++) { + tap = blktaps[minor]; + if (!tap) + goto found; + + if (!tap->dev_inuse) { + blktap_control_initialize_tap(tap); + goto found; + } + } + + tap = NULL; + +found: + spin_unlock_irq(&blktap_control_lock); + + if (!tap) { + tap = blktap_control_create_tap(); + if (!tap) + return NULL; + } + + err = blktap_ring_create(tap); + if (err) { + BTERR("ring creation failed: %d\n", err); + clear_bit(BLKTAP_CONTROL, &tap->dev_inuse); + return NULL; + } + + BTINFO("allocated tap %p\n", tap); + return tap; +} + +static int +blktap_control_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + unsigned long dev; + struct blktap *tap; + + switch (cmd) { + case BLKTAP2_IOCTL_ALLOC_TAP: { + struct blktap_handle h; + + tap = blktap_control_allocate_tap(); + if (!tap) { + BTERR("error allocating device\n"); + return -ENOMEM; + } + + h.ring = ring_major; + h.device = device_major; + h.minor = tap->minor; + + if (copy_to_user((struct blktap_handle __user *)arg, + &h, sizeof(h))) { + blktap_control_destroy_device(tap); + return -EFAULT; + } + + return 0; + } + + case BLKTAP2_IOCTL_FREE_TAP: + dev = arg; + + if (dev > MAX_BLKTAP_DEVICE || !blktaps[dev]) + return -EINVAL; + + blktap_control_destroy_device(blktaps[dev]); + return 0; + } + + return -ENOIOCTLCMD; +} + +static struct file_operations blktap_control_file_operations = { + .owner = THIS_MODULE, + .ioctl = blktap_control_ioctl, +}; + +static struct miscdevice blktap_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "blktap-control", + .fops = &blktap_control_file_operations, +}; + +int +blktap_control_destroy_device(struct blktap *tap) +{ + int err; + unsigned long inuse; + + if (!tap) + return 0; + + set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse); + + for (;;) { + inuse = tap->dev_inuse; + err = blktap_device_destroy(tap); + if (err) + goto wait; + + inuse = tap->dev_inuse; + err = blktap_ring_destroy(tap); + if (err) + goto wait; + + inuse = tap->dev_inuse; + err = blktap_sysfs_destroy(tap); + if (err) + goto wait; + + break; + + wait: + BTDBG("inuse: 0x%lx, dev_inuse: 0x%lx\n", + inuse, tap->dev_inuse); + if (wait_event_interruptible(tap->wq, tap->dev_inuse != inuse)) + break; + } + + clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse); + + if (tap->dev_inuse == (1UL << BLKTAP_CONTROL)) { + err = 0; + clear_bit(BLKTAP_CONTROL, &tap->dev_inuse); + } + + return err; +} + +static int +blktap_control_init(void) +{ + int err; + + err = misc_register(&blktap_misc); + if (err) { + BTERR("misc_register failed for control device"); + return err; + } + + blktap_control_registered = 1; + return 0; +} + +static void +blktap_control_free(void) +{ + int i; + + for (i = 0; i < MAX_BLKTAP_DEVICE; i++) + blktap_control_destroy_device(blktaps[i]); + + if (blktap_control_registered) + if (misc_deregister(&blktap_misc) < 0) + BTERR("misc_deregister failed for control device"); +} + +static void +blktap_exit(void) +{ + blktap_control_free(); + blktap_ring_free(); + blktap_sysfs_free(); + blktap_device_free(); + blktap_request_pool_free(); +} + +static int __init +blktap_init(void) +{ + int err; + + err = blktap_request_pool_init(); + if (err) + return err; + + err = blktap_device_init(&device_major); + if (err) + goto fail; + + err = blktap_ring_init(&ring_major); + if (err) + goto fail; + + err = blktap_sysfs_init(); + if (err) + goto fail; + + err = blktap_control_init(); + if (err) + goto fail; + + return 0; + +fail: + blktap_exit(); + return err; +} + +module_init(blktap_init); +module_exit(blktap_exit); +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/sysfs.c +++ linux-ec2-2.6.31/drivers/xen/blktap2/sysfs.c @@ -0,0 +1,437 @@ +#include +#include +#include + +#include "blktap.h" + +int blktap_debug_level = 1; + +static struct class *class; +static DECLARE_WAIT_QUEUE_HEAD(sysfs_wq); + +static inline void +blktap_sysfs_get(struct blktap *tap) +{ + atomic_inc(&tap->ring.sysfs_refcnt); +} + +static inline void +blktap_sysfs_put(struct blktap *tap) +{ + if (atomic_dec_and_test(&tap->ring.sysfs_refcnt)) + wake_up(&sysfs_wq); +} + +static inline void +blktap_sysfs_enter(struct blktap *tap) +{ + blktap_sysfs_get(tap); /* pin sysfs device */ + mutex_lock(&tap->ring.sysfs_mutex); /* serialize sysfs operations */ +} + +static inline void +blktap_sysfs_exit(struct blktap *tap) +{ + mutex_unlock(&tap->ring.sysfs_mutex); + blktap_sysfs_put(tap); +} + +static ssize_t blktap_sysfs_pause_device(struct device *, + struct device_attribute *, + const char *, size_t); +DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device); +static ssize_t blktap_sysfs_resume_device(struct device *, + struct device_attribute *, + const char *, size_t); +DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device); + +static ssize_t +blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int err; + struct blktap *tap = dev_get_drvdata(dev); + + blktap_sysfs_enter(tap); + + if (!tap->ring.dev || + test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) { + err = -ENODEV; + goto out; + } + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) { + err = -EPERM; + goto out; + } + + if (size > BLKTAP2_MAX_MESSAGE_LEN) { + err = -ENAMETOOLONG; + goto out; + } + + if (strnlen(buf, BLKTAP2_MAX_MESSAGE_LEN) >= BLKTAP2_MAX_MESSAGE_LEN) { + err = -EINVAL; + goto out; + } + + snprintf(tap->params.name, sizeof(tap->params.name) - 1, "%s", buf); + err = size; + +out: + blktap_sysfs_exit(tap); + return err; +} + +static ssize_t +blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ssize_t size; + struct blktap *tap = dev_get_drvdata(dev); + + blktap_sysfs_enter(tap); + + if (!tap->ring.dev) + size = -ENODEV; + else if (tap->params.name[0]) + size = sprintf(buf, "%s\n", tap->params.name); + else + size = sprintf(buf, "%d\n", tap->minor); + + blktap_sysfs_exit(tap); + + return size; +} +DEVICE_ATTR(name, S_IRUSR | S_IWUSR, + blktap_sysfs_get_name, blktap_sysfs_set_name); + +static ssize_t +blktap_sysfs_remove_device(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int err; + struct blktap *tap = dev_get_drvdata(dev); + + if (!tap->ring.dev) + return size; + + if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + return -EBUSY; + + err = blktap_control_destroy_device(tap); + + return (err ? : size); +} +DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); + +static ssize_t +blktap_sysfs_pause_device(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int err; + struct blktap *tap = dev_get_drvdata(dev); + + blktap_sysfs_enter(tap); + + BTDBG("pausing %u:%u: dev_inuse: %lu\n", + MAJOR(tap->ring.devno), MINOR(tap->ring.devno), tap->dev_inuse); + + if (!tap->ring.dev || + test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) { + err = -ENODEV; + goto out; + } + + if (test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) { + err = -EBUSY; + goto out; + } + + if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) { + err = 0; + goto out; + } + + err = blktap_device_pause(tap); + if (!err) { + device_remove_file(dev, &dev_attr_pause); + err = device_create_file(dev, &dev_attr_resume); + } + +out: + blktap_sysfs_exit(tap); + + return (err ? err : size); +} + +static ssize_t +blktap_sysfs_resume_device(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int err; + struct blktap *tap = dev_get_drvdata(dev); + + blktap_sysfs_enter(tap); + + if (!tap->ring.dev || + test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) { + err = -ENODEV; + goto out; + } + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) { + err = -EINVAL; + goto out; + } + + err = blktap_device_resume(tap); + if (!err) { + device_remove_file(dev, &dev_attr_resume); + err = device_create_file(dev, &dev_attr_pause); + } + +out: + blktap_sysfs_exit(tap); + + BTDBG("returning %zd\n", (err ? err : size)); + return (err ? err : size); +} + +#ifdef ENABLE_PASSTHROUGH +static ssize_t +blktap_sysfs_enable_passthrough(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int err; + unsigned major, minor; + struct blktap *tap = dev_get_drvdata(dev); + + BTINFO("passthrough request enabled\n"); + + blktap_sysfs_enter(tap); + + if (!tap->ring.dev || + test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) { + err = -ENODEV; + goto out; + } + + if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) { + err = -EINVAL; + goto out; + } + + if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) { + err = -EINVAL; + goto out; + } + + err = sscanf(buf, "%x:%x", &major, &minor); + if (err != 2) { + err = -EINVAL; + goto out; + } + + err = blktap_device_enable_passthrough(tap, major, minor); + +out: + blktap_sysfs_exit(tap); + BTDBG("returning %d\n", (err ? err : size)); + return (err ? err : size); +} +#endif + +static ssize_t +blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, + char *buf) +{ + char *tmp; + int i, ret; + struct blktap *tap = dev_get_drvdata(dev); + + tmp = buf; + blktap_sysfs_get(tap); + + if (!tap->ring.dev) { + ret = sprintf(tmp, "no device\n"); + goto out; + } + + tmp += sprintf(tmp, "%s (%u:%u), refcnt: %d, dev_inuse: 0x%08lx\n", + tap->params.name, MAJOR(tap->ring.devno), + MINOR(tap->ring.devno), atomic_read(&tap->refcnt), + tap->dev_inuse); + tmp += sprintf(tmp, "capacity: 0x%llx, sector size: 0x%lx, " + "device users: %d\n", tap->params.capacity, + tap->params.sector_size, tap->device.users); + + down_read(&tap->tap_sem); + + tmp += sprintf(tmp, "pending requests: %d\n", tap->pending_cnt); + for (i = 0; i < MAX_PENDING_REQS; i++) { + struct blktap_request *req = tap->pending_requests[i]; + if (!req) + continue; + + tmp += sprintf(tmp, "req %d: id: %llu, usr_idx: %d, " + "status: 0x%02x, pendcnt: %d, " + "nr_pages: %u, op: %d, time: %lu:%lu\n", + i, (unsigned long long)req->id, req->usr_idx, + req->status, atomic_read(&req->pendcnt), + req->nr_pages, req->operation, req->time.tv_sec, + req->time.tv_usec); + } + + up_read(&tap->tap_sem); + ret = (tmp - buf) + 1; + +out: + blktap_sysfs_put(tap); + BTDBG("%s\n", buf); + + return ret; +} +DEVICE_ATTR(debug, S_IRUSR, blktap_sysfs_debug_device, NULL); + +int +blktap_sysfs_create(struct blktap *tap) +{ + struct blktap_ring *ring; + struct device *dev; + + if (!class) + return -ENODEV; + + ring = &tap->ring; + + dev = device_create(class, NULL, ring->devno, tap, + "blktap%d", tap->minor); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ring->dev = dev; + + mutex_init(&ring->sysfs_mutex); + atomic_set(&ring->sysfs_refcnt, 0); + set_bit(BLKTAP_SYSFS, &tap->dev_inuse); + + if (device_create_file(dev, &dev_attr_name) || + device_create_file(dev, &dev_attr_remove) || + device_create_file(dev, &dev_attr_pause) || + device_create_file(dev, &dev_attr_debug)) + printk(KERN_WARNING + "One or more attibute files not created for blktap%d\n", + tap->minor); + + return 0; +} + +int +blktap_sysfs_destroy(struct blktap *tap) +{ + struct blktap_ring *ring; + struct device *dev; + + ring = &tap->ring; + dev = ring->dev; + if (!class || !dev) + return 0; + + ring->dev = NULL; + if (wait_event_interruptible(sysfs_wq, + !atomic_read(&tap->ring.sysfs_refcnt))) + return -EAGAIN; + + /* XXX: is it safe to remove the class from a sysfs attribute? */ + device_remove_file(dev, &dev_attr_name); + device_remove_file(dev, &dev_attr_remove); + device_remove_file(dev, &dev_attr_pause); + device_remove_file(dev, &dev_attr_resume); + device_remove_file(dev, &dev_attr_debug); + device_destroy(class, ring->devno); + + clear_bit(BLKTAP_SYSFS, &tap->dev_inuse); + + return 0; +} + +static ssize_t +blktap_sysfs_show_verbosity(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", blktap_debug_level); +} + +static ssize_t +blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) +{ + int level; + + if (sscanf(buf, "%d", &level) == 1) { + blktap_debug_level = level; + return size; + } + + return -EINVAL; +} +CLASS_ATTR(verbosity, S_IRUSR | S_IWUSR, + blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); + +static ssize_t +blktap_sysfs_show_devices(struct class *class, char *buf) +{ + int i, ret; + struct blktap *tap; + + ret = 0; + for (i = 0; i < MAX_BLKTAP_DEVICE; i++) { + tap = blktaps[i]; + if (!tap) + continue; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + continue; + + ret += sprintf(buf + ret, "%d ", tap->minor); + ret += snprintf(buf + ret, sizeof(tap->params.name) - 1, + tap->params.name); + ret += sprintf(buf + ret, "\n"); + } + + return ret; +} +CLASS_ATTR(devices, S_IRUSR, blktap_sysfs_show_devices, NULL); + +void +blktap_sysfs_free(void) +{ + if (!class) + return; + + class_remove_file(class, &class_attr_verbosity); + class_remove_file(class, &class_attr_devices); + + class_destroy(class); +} + +int +blktap_sysfs_init(void) +{ + struct class *cls; + + if (class) + return -EEXIST; + + cls = class_create(THIS_MODULE, "blktap2"); + if (IS_ERR(cls)) + return PTR_ERR(cls); + + if (class_create_file(cls, &class_attr_verbosity) || + class_create_file(cls, &class_attr_devices)) + printk(KERN_WARNING "blktap2: One or more " + "class attribute files could not be created.\n"); + + class = cls; + return 0; +} --- linux-ec2-2.6.31.orig/drivers/xen/blktap2/Makefile +++ linux-ec2-2.6.31/drivers/xen/blktap2/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_XEN_BLKDEV_TAP2) := blktap2.o + +blktap2-y := control.o ring.o wait_queue.o device.o request.o +blktap2-$(CONFIG_SYSFS) += sysfs.o --- linux-ec2-2.6.31.orig/drivers/xen/balloon/balloon.c +++ linux-ec2-2.6.31/drivers/xen/balloon/balloon.c @@ -0,0 +1,752 @@ +/****************************************************************************** + * balloon.c + * + * Xen balloon driver - enables returning/claiming memory to/from Xen. + * + * Copyright (c) 2003, B Dragovic + * Copyright (c) 2003-2004, M Williamson, K Fraser + * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *balloon_pde; +#endif + +static DEFINE_MUTEX(balloon_mutex); + +/* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and driver_pages, and + * balloon lists. + */ +DEFINE_SPINLOCK(balloon_lock); + +struct balloon_stats balloon_stats; + +/* We increase/decrease in batches which fit in a page */ +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; + +#ifdef CONFIG_HIGHMEM +#define inc_totalhigh_pages() (totalhigh_pages++) +#define dec_totalhigh_pages() (totalhigh_pages--) +#else +#define inc_totalhigh_pages() ((void)0) +#define dec_totalhigh_pages() ((void)0) +#endif + +/* List of ballooned pages, threaded through the mem_map array. */ +static LIST_HEAD(ballooned_pages); + +/* Main work function, always executed in process context. */ +static void balloon_process(struct work_struct *unused); +static DECLARE_WORK(balloon_worker, balloon_process); +static struct timer_list balloon_timer; + +/* When ballooning out (allocating memory to return to Xen) we don't really + want the kernel to try too hard since that can trigger the oom killer. */ +#define GFP_BALLOON \ + (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD) + +#define PAGE_TO_LIST(p) (&(p)->lru) +#define LIST_TO_PAGE(l) list_entry((l), struct page, lru) +#define UNLIST_PAGE(p) \ + do { \ + list_del(PAGE_TO_LIST(p)); \ + PAGE_TO_LIST(p)->next = NULL; \ + PAGE_TO_LIST(p)->prev = NULL; \ + } while(0) + +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_mem: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_mem: " fmt, ##args) + +/* balloon_append: add the given page to the balloon. */ +static void balloon_append(struct page *page, int account) +{ + unsigned long pfn; + + /* Lowmem is re-populated first, so highmem pages go at list tail. */ + if (PageHighMem(page)) { + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); + bs.balloon_high++; + if (account) + dec_totalhigh_pages(); + } else { + list_add(PAGE_TO_LIST(page), &ballooned_pages); + bs.balloon_low++; + } + + pfn = page_to_pfn(page); + if (account) { + SetPageReserved(page); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + page_zone(page)->present_pages--; + } else { + BUG_ON(!PageReserved(page)); + WARN_ON_ONCE(phys_to_machine_mapping_valid(pfn)); + } +} + +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ +static struct page *balloon_retrieve(int *was_empty) +{ + struct page *page; + struct zone *zone; + + if (list_empty(&ballooned_pages)) + return NULL; + + page = LIST_TO_PAGE(ballooned_pages.next); + UNLIST_PAGE(page); + BUG_ON(!PageReserved(page)); + + if (PageHighMem(page)) { + bs.balloon_high--; + inc_totalhigh_pages(); + } + else + bs.balloon_low--; + zone = page_zone(page); + *was_empty |= !populated_zone(zone); + zone->present_pages++; + + return page; +} + +static struct page *balloon_first_page(void) +{ + if (list_empty(&ballooned_pages)) + return NULL; + return LIST_TO_PAGE(ballooned_pages.next); +} + +static struct page *balloon_next_page(struct page *page) +{ + struct list_head *next = PAGE_TO_LIST(page)->next; + if (next == &ballooned_pages) + return NULL; + return LIST_TO_PAGE(next); +} + +static inline void balloon_free_page(struct page *page) +{ +#ifndef MODULE + if (put_page_testzero(page)) + free_cold_page(page); +#else + /* free_cold_page() is not being exported. */ + __free_page(page); +#endif +} + +static void balloon_alarm(unsigned long unused) +{ + schedule_work(&balloon_worker); +} + +static unsigned long current_target(void) +{ + unsigned long target = bs.target_pages; + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) + target = bs.current_pages + bs.balloon_low + bs.balloon_high; + return target; +} + +unsigned long balloon_minimum_target(void) +{ +#ifndef CONFIG_XEN +#define max_pfn num_physpages +#endif + unsigned long min_pages, curr_pages = current_target(); + +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + /* Simple continuous piecewiese linear function: + * max MiB -> min MiB gradient + * 0 0 + * 16 16 + * 32 24 + * 128 72 (1/2) + * 512 168 (1/4) + * 2048 360 (1/8) + * 8192 552 (1/32) + * 32768 1320 + * 131072 4392 + */ + if (max_pfn < MB2PAGES(128)) + min_pages = MB2PAGES(8) + (max_pfn >> 1); + else if (max_pfn < MB2PAGES(512)) + min_pages = MB2PAGES(40) + (max_pfn >> 2); + else if (max_pfn < MB2PAGES(2048)) + min_pages = MB2PAGES(104) + (max_pfn >> 3); + else + min_pages = MB2PAGES(296) + (max_pfn >> 5); +#undef MB2PAGES + + /* Don't enforce growth */ + return min(min_pages, curr_pages); +#ifndef CONFIG_XEN +#undef max_pfn +#endif +} + +static int increase_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i, flags; + struct page *page; + long rc; + int need_zonelists_rebuild = 0; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + balloon_lock(flags); + + page = balloon_first_page(); + for (i = 0; i < nr_pages; i++) { + BUG_ON(page == NULL); + frame_list[i] = page_to_pfn(page);; + page = balloon_next_page(page); + } + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + if (rc < 0) + goto out; + + for (i = 0; i < rc; i++) { + page = balloon_retrieve(&need_zonelists_rebuild); + BUG_ON(page == NULL); + + pfn = page_to_pfn(page); + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && + phys_to_machine_mapping_valid(pfn)); + + set_phys_to_machine(pfn, frame_list[i]); + +#ifdef CONFIG_XEN + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + pfn_pte_ma(frame_list[i], PAGE_KERNEL), + 0); + BUG_ON(ret); + } +#endif + + /* Relinquish the page back to the allocator. */ + ClearPageReserved(page); + init_page_count(page); + balloon_free_page(page); + } + + bs.current_pages += rc; + totalram_pages = bs.current_pages; + + out: + balloon_unlock(flags); + +#ifndef MODULE + setup_per_zone_wmarks(); + if (rc > 0) + kswapd_run(0); + if (need_zonelists_rebuild) + build_all_zonelists(); + else + vm_total_pages = nr_free_pagecache_pages(); +#endif + + return rc < 0 ? rc : rc != nr_pages; +} + +static int decrease_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i, flags; + struct page *page; + void *v; + int need_sleep = 0; + int ret; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + for (i = 0; i < nr_pages; i++) { + if ((page = alloc_page(GFP_BALLOON)) == NULL) { + nr_pages = i; + need_sleep = 1; + break; + } + + pfn = page_to_pfn(page); + frame_list[i] = pfn_to_mfn(pfn); + + if (!PageHighMem(page)) { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); +#ifdef CONFIG_XEN + ret = HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0); + BUG_ON(ret); +#endif + } +#ifdef CONFIG_XEN_SCRUB_PAGES + else { + v = kmap(page); + scrub_pages(v, 1); + kunmap(page); + } +#endif + } + +#ifdef CONFIG_XEN + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); +#endif + + balloon_lock(flags); + + /* No more mappings: invalidate P2M and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = mfn_to_pfn(frame_list[i]); + balloon_append(pfn_to_page(pfn), 1); + } + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_pages; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != nr_pages); + + bs.current_pages -= nr_pages; + totalram_pages = bs.current_pages; + + balloon_unlock(flags); + + return need_sleep; +} + +/* + * We avoid multiple worker processes conflicting via the balloon mutex. + * We may of course race updates of the target counts (which are protected + * by the balloon lock), or with changes to the Xen hard limit, but we will + * recover from these in time. + */ +static void balloon_process(struct work_struct *unused) +{ + int need_sleep = 0; + long credit; + + mutex_lock(&balloon_mutex); + + do { + credit = current_target() - bs.current_pages; + if (credit > 0) + need_sleep = (increase_reservation(credit) != 0); + if (credit < 0) + need_sleep = (decrease_reservation(-credit) != 0); + +#ifndef CONFIG_PREEMPT + if (need_resched()) + schedule(); +#endif + } while ((credit != 0) && !need_sleep); + + /* Schedule more work if there is some still to be done. */ + if (current_target() != bs.current_pages) + mod_timer(&balloon_timer, jiffies + HZ); + + mutex_unlock(&balloon_mutex); +} + +/* Resets the Xen limit, sets new target, and kicks off processing. */ +void balloon_set_new_target(unsigned long target) +{ + /* No need for lock. Not read-modify-write updates. */ + bs.target_pages = max(target, balloon_minimum_target()); + schedule_work(&balloon_worker); +} + +static struct xenbus_watch target_watch = +{ + .node = "memory/target" +}; + +/* React to a change in the target key */ +static void watch_target(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + unsigned long long new_target; + int err; + + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); + if (err != 1) { + /* This is ok (for domain0 at least) - so just return */ + return; + } + + /* The given memory/target value is in KiB, so it needs converting to + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. + */ + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); +} + +static int balloon_init_watcher(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + int err; + + err = register_xenbus_watch(&target_watch); + if (err) + printk(KERN_ERR "Failed to set balloon watcher\n"); + + return NOTIFY_DONE; +} + +#ifdef CONFIG_PROC_FS +static int balloon_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char memstring[64], *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 1) + return -EBADMSG; /* runt */ + if (count > sizeof(memstring)) + return -EFBIG; /* too long */ + + if (copy_from_user(memstring, buffer, count)) + return -EFAULT; + memstring[sizeof(memstring)-1] = '\0'; + + target_bytes = memparse(memstring, &endchar); + balloon_set_new_target(target_bytes >> PAGE_SHIFT); + + return count; +} + +static int balloon_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf( + page, + "Current allocation: %8lu kB\n" + "Requested target: %8lu kB\n" + "Minimum target: %8lu kB\n" + "Maximum target: %8lu kB\n" + "Low-mem balloon: %8lu kB\n" + "High-mem balloon: %8lu kB\n" + "Driver pages: %8lu kB\n", + PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages), + PAGES2KB(balloon_minimum_target()), PAGES2KB(num_physpages), + PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high), + PAGES2KB(bs.driver_pages)); + + + *eof = 1; + return len; +} +#endif + +static struct notifier_block xenstore_notifier; + +static int __init balloon_init(void) +{ +#if defined(CONFIG_X86) && defined(CONFIG_XEN) + unsigned long pfn; + struct page *page; +#endif + + if (!is_running_on_xen()) + return -ENODEV; + + IPRINTK("Initialising balloon driver.\n"); + +#ifdef CONFIG_XEN + bs.current_pages = min(xen_start_info->nr_pages, max_pfn); + totalram_pages = bs.current_pages; +#else + bs.current_pages = totalram_pages; +#endif + bs.target_pages = bs.current_pages; + bs.balloon_low = 0; + bs.balloon_high = 0; + bs.driver_pages = 0UL; + + init_timer(&balloon_timer); + balloon_timer.data = 0; + balloon_timer.function = balloon_alarm; + +#ifdef CONFIG_PROC_FS + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) { + WPRINTK("Unable to create /proc/xen/balloon.\n"); + return -1; + } + + balloon_pde->read_proc = balloon_read; + balloon_pde->write_proc = balloon_write; +#endif + balloon_sysfs_init(); + +#if defined(CONFIG_X86) && defined(CONFIG_XEN) + /* Initialise the balloon with excess memory space. */ + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + page = pfn_to_page(pfn); + if (!PageReserved(page)) { + SetPageReserved(page); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + balloon_append(page, 0); + } + } +#endif + + target_watch.callback = watch_target; + xenstore_notifier.notifier_call = balloon_init_watcher; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(balloon_init); + +static void __exit balloon_exit(void) +{ + balloon_sysfs_exit(); + /* XXX - release balloon here */ +} + +module_exit(balloon_exit); + +void balloon_update_driver_allowance(long delta) +{ + unsigned long flags; + + balloon_lock(flags); + bs.driver_pages += delta; + balloon_unlock(flags); +} + +#ifdef CONFIG_XEN +static int dealloc_pte_fn( + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) +{ + unsigned long pfn, mfn = pte_mfn(*pte); + int ret; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &mfn); + set_pte_at(&init_mm, addr, pte, __pte_ma(0)); + pfn = __pa(addr) >> PAGE_SHIFT; + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + SetPageReserved(pfn_to_page(pfn)); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != 1); + return 0; +} +#endif + +struct page **alloc_empty_pages_and_pagevec(int nr_pages) +{ + unsigned long flags; + void *v; + struct page *page, **pagevec; + int i, ret; + + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); + if (pagevec == NULL) + return NULL; + + for (i = 0; i < nr_pages; i++) { + page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD); + if (page == NULL) + goto err; + + v = page_address(page); + scrub_pages(v, 1); + + balloon_lock(flags); + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long gmfn = page_to_pfn(page); + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gmfn); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + if (ret == 1) + ret = 0; /* success */ + } else { +#ifdef CONFIG_XEN + ret = apply_to_page_range(&init_mm, (unsigned long)v, + PAGE_SIZE, dealloc_pte_fn, + NULL); +#else + /* Cannot handle non-auto translate mode. */ + ret = 1; +#endif + } + + if (ret != 0) { + balloon_unlock(flags); + balloon_free_page(page); + goto err; + } + + totalram_pages = --bs.current_pages; + if (PageHighMem(page)) + dec_totalhigh_pages(); + page_zone(page)->present_pages--; + + balloon_unlock(flags); + } + + out: + schedule_work(&balloon_worker); +#ifdef CONFIG_XEN + flush_tlb_all(); +#endif + return pagevec; + + err: + balloon_lock(flags); + while (--i >= 0) + balloon_append(pagevec[i], 0); + balloon_unlock(flags); + kfree(pagevec); + pagevec = NULL; + goto out; +} + +static void _free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages, + int free_vec) +{ + unsigned long flags; + int i; + + if (pagevec == NULL) + return; + + balloon_lock(flags); + for (i = 0; i < nr_pages; i++) { + BUG_ON(page_count(pagevec[i]) != 1); + balloon_append(pagevec[i], !free_vec); + } + if (!free_vec) + totalram_pages = bs.current_pages -= nr_pages; + balloon_unlock(flags); + + if (free_vec) + kfree(pagevec); + + schedule_work(&balloon_worker); +} + +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) +{ + _free_empty_pages_and_pagevec(pagevec, nr_pages, 1); +} + +void free_empty_pages(struct page **pagevec, int nr_pages) +{ + _free_empty_pages_and_pagevec(pagevec, nr_pages, 0); +} + +void balloon_release_driver_page(struct page *page) +{ + unsigned long flags; + + balloon_lock(flags); + balloon_append(page, 1); + bs.driver_pages--; + balloon_unlock(flags); + + schedule_work(&balloon_worker); +} + +EXPORT_SYMBOL_GPL(balloon_update_driver_allowance); +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec); +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec); +EXPORT_SYMBOL_GPL(balloon_release_driver_page); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/balloon/sysfs.c +++ linux-ec2-2.6.31/drivers/xen/balloon/sysfs.c @@ -0,0 +1,204 @@ +/****************************************************************************** + * balloon/sysfs.c + * + * Xen balloon driver - sysfs interfaces. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#define BALLOON_CLASS_NAME "xen_memory" + +#define BALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) + +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages)); +BALLOON_SHOW(min_kb, "%lu\n", PAGES2KB(balloon_minimum_target())); +BALLOON_SHOW(max_kb, "%lu\n", PAGES2KB(num_physpages)); +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low)); +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high)); +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages)); + +static ssize_t show_target_kb(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages)); +} + +static ssize_t store_target_kb(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) +{ + char *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 1) + return -EBADMSG; /* runt */ + + target_bytes = simple_strtoull(buf, &endchar, 0) << 10; + balloon_set_new_target(target_bytes >> PAGE_SHIFT); + + return count; +} + +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, + show_target_kb, store_target_kb); + +static ssize_t show_target(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + return sprintf(buf, "%llu\n", + (unsigned long long)balloon_stats.target_pages + << PAGE_SHIFT); +} + +static ssize_t store_target(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + char *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 1) + return -EBADMSG; /* runt */ + + target_bytes = memparse(buf, &endchar); + balloon_set_new_target(target_bytes >> PAGE_SHIFT); + + return count; +} + +static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, + show_target, store_target); + +static struct sysdev_attribute *balloon_attrs[] = { + &attr_target_kb, + &attr_target, +}; + +static struct attribute *balloon_info_attrs[] = { + &attr_current_kb.attr, + &attr_min_kb.attr, + &attr_max_kb.attr, + &attr_low_kb.attr, + &attr_high_kb.attr, + &attr_driver_kb.attr, + NULL +}; + +static struct attribute_group balloon_info_group = { + .name = "info", + .attrs = balloon_info_attrs, +}; + +static struct sysdev_class balloon_sysdev_class = { + .name = BALLOON_CLASS_NAME, +}; + +static struct sys_device balloon_sysdev; + +static int __init register_balloon(struct sys_device *sysdev) +{ + int i, error; + + error = sysdev_class_register(&balloon_sysdev_class); + if (error) + return error; + + sysdev->id = 0; + sysdev->cls = &balloon_sysdev_class; + + error = sysdev_register(sysdev); + if (error) { + sysdev_class_unregister(&balloon_sysdev_class); + return error; + } + + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { + error = sysdev_create_file(sysdev, balloon_attrs[i]); + if (error) + goto fail; + } + + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); + if (error) + goto fail; + + return 0; + + fail: + while (--i >= 0) + sysdev_remove_file(sysdev, balloon_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&balloon_sysdev_class); + return error; +} + +static __exit void unregister_balloon(struct sys_device *sysdev) +{ + int i; + + sysfs_remove_group(&sysdev->kobj, &balloon_info_group); + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) + sysdev_remove_file(sysdev, balloon_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&balloon_sysdev_class); +} + +int __init balloon_sysfs_init(void) +{ + return register_balloon(&balloon_sysdev); +} + +void __exit balloon_sysfs_exit(void) +{ + unregister_balloon(&balloon_sysdev); +} --- linux-ec2-2.6.31.orig/drivers/xen/balloon/Makefile +++ linux-ec2-2.6.31/drivers/xen/balloon/Makefile @@ -0,0 +1,2 @@ + +obj-y := balloon.o sysfs.o --- linux-ec2-2.6.31.orig/drivers/xen/balloon/common.h +++ linux-ec2-2.6.31/drivers/xen/balloon/common.h @@ -0,0 +1,57 @@ +/****************************************************************************** + * balloon/common.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __XEN_BALLOON_COMMON_H__ +#define __XEN_BALLOON_COMMON_H__ + +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) + +struct balloon_stats { + /* We aim for 'current allocation' == 'target allocation'. */ + unsigned long current_pages; + unsigned long target_pages; + /* + * Drivers may alter the memory reservation independently, but they + * must inform the balloon driver so we avoid hitting the hard limit. + */ + unsigned long driver_pages; + /* Number of pages in high- and low-memory balloons. */ + unsigned long balloon_low; + unsigned long balloon_high; +}; + +extern struct balloon_stats balloon_stats; +#define bs balloon_stats + +int balloon_sysfs_init(void); +void balloon_sysfs_exit(void); + +void balloon_set_new_target(unsigned long target); +unsigned long balloon_minimum_target(void); + +#endif /* __XEN_BALLOON_COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/usbback/interface.c +++ linux-ec2-2.6.31/drivers/xen/usbback/interface.c @@ -0,0 +1,208 @@ +/* + * interface.c + * + * Xen USB backend interface management. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * or, by your choice, + * + * When distributed separately from the Linux kernel or incorporated into + * other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "usbback.h" + +static LIST_HEAD(usbif_list); +static DEFINE_SPINLOCK(usbif_list_lock); + +usbif_t *find_usbif(int dom_id, int dev_id) +{ + usbif_t *usbif; + int found = 0; + unsigned long flags; + + spin_lock_irqsave(&usbif_list_lock, flags); + list_for_each_entry(usbif, &usbif_list, usbif_list) { + if (usbif->domid == dom_id + && usbif->handle == dev_id) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&usbif_list_lock, flags); + + if (found) + return usbif; + + return NULL; +} + +usbif_t *usbif_alloc(domid_t domid, unsigned int handle) +{ + usbif_t *usbif; + unsigned long flags; + int i; + + usbif = kzalloc(sizeof(usbif_t), GFP_KERNEL); + if (!usbif) + return NULL; + + usbif->domid = domid; + usbif->handle = handle; + spin_lock_init(&usbif->ring_lock); + atomic_set(&usbif->refcnt, 0); + init_waitqueue_head(&usbif->wq); + init_waitqueue_head(&usbif->waiting_to_free); + spin_lock_init(&usbif->plug_lock); + INIT_LIST_HEAD(&usbif->plugged_devices); + spin_lock_init(&usbif->addr_lock); + for (i = 0; i < USB_DEV_ADDR_SIZE; i++) { + usbif->addr_table[i] = NULL; + } + + spin_lock_irqsave(&usbif_list_lock, flags); + list_add(&usbif->usbif_list, &usbif_list); + spin_unlock_irqrestore(&usbif_list_lock, flags); + + return usbif; +} + +static int map_frontend_page(usbif_t *usbif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)usbif->ring_area->addr, + GNTMAP_host_map, shared_page, usbif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + printk(KERN_ERR "grant table operation failure\n"); + return op.status; + } + + usbif->shmem_ref = shared_page; + usbif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(usbif_t *usbif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)usbif->ring_area->addr, + GNTMAP_host_map, usbif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int usbif_map(usbif_t *usbif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + usbif_sring_t *sring; + + if (usbif->irq) + return 0; + + if ((usbif->ring_area = alloc_vm_area(PAGE_SIZE)) == NULL) + return -ENOMEM; + + err = map_frontend_page(usbif, shared_page); + if (err) { + free_vm_area(usbif->ring_area); + return err; + } + + sring = (usbif_sring_t *) usbif->ring_area->addr; + BACK_RING_INIT(&usbif->ring, sring, PAGE_SIZE); + + err = bind_interdomain_evtchn_to_irqhandler( + usbif->domid, evtchn, usbbk_be_int, 0, "usbif-backend", usbif); + if (err < 0) + { + unmap_frontend_page(usbif); + free_vm_area(usbif->ring_area); + usbif->ring.sring = NULL; + return err; + } + usbif->irq = err; + + return 0; +} + +void usbif_disconnect(usbif_t *usbif) +{ + struct usbstub *stub, *tmp; + unsigned long flags; + + if (usbif->xenusbd) { + kthread_stop(usbif->xenusbd); + usbif->xenusbd = NULL; + } + + spin_lock_irqsave(&usbif->plug_lock, flags); + list_for_each_entry_safe(stub, tmp, &usbif->plugged_devices, plugged_list) { + usbbk_unlink_urbs(stub); + detach_device_without_lock(usbif, stub); + } + spin_unlock_irqrestore(&usbif->plug_lock, flags); + + wait_event(usbif->waiting_to_free, atomic_read(&usbif->refcnt) == 0); + + if (usbif->irq) { + unbind_from_irqhandler(usbif->irq, usbif); + usbif->irq = 0; + } + + if (usbif->ring.sring) { + unmap_frontend_page(usbif); + free_vm_area(usbif->ring_area); + usbif->ring.sring = NULL; + } +} + +void usbif_free(usbif_t *usbif) +{ + unsigned long flags; + + spin_lock_irqsave(&usbif_list_lock, flags); + list_del(&usbif->usbif_list); + spin_unlock_irqrestore(&usbif_list_lock, flags); + kfree(usbif); +} --- linux-ec2-2.6.31.orig/drivers/xen/usbback/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/usbback/xenbus.c @@ -0,0 +1,268 @@ +/* + * xenbus.c + * + * Xenbus interface for USB backend driver. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * or, by your choice, + * + * When distributed separately from the Linux kernel or incorporated into + * other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include "usbback.h" + +static int start_xenusbd(usbif_t *usbif) +{ + int err = 0; + char name[TASK_COMM_LEN]; + + snprintf(name, TASK_COMM_LEN, "usbback.%d.%d", usbif->domid, usbif->handle); + usbif->xenusbd = kthread_run(usbbk_schedule, usbif, name); + if (IS_ERR(usbif->xenusbd)) { + err = PTR_ERR(usbif->xenusbd); + usbif->xenusbd = NULL; + xenbus_dev_error(usbif->xbdev, err, "start xenusbd"); + } + return err; +} + +static int usbback_remove(struct xenbus_device *dev) +{ + usbif_t *usbif = dev_get_drvdata(&dev->dev); + + if (usbif) { + usbif_disconnect(usbif); + usbif_free(usbif);; + } + dev_set_drvdata(&dev->dev, NULL); + + return 0; +} + +static int usbback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + usbif_t *usbif; + unsigned int handle; + int err; + + if (usb_disabled()) + return -ENODEV; + + handle = simple_strtoul(strrchr(dev->otherend,'/')+1, NULL, 0); + usbif = usbif_alloc(dev->otherend_id, handle); + if (!usbif) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating backend interface"); + return -ENOMEM; + } + usbif->xbdev = dev; + dev_set_drvdata(&dev->dev, usbif); + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + usbback_remove(dev); + return err; +} + +static int connect_ring(usbif_t *usbif) +{ + struct xenbus_device *dev = usbif->xbdev; + unsigned long ring_ref; + unsigned int evtchn; + int err; + + err = xenbus_gather(XBT_NIL, dev->otherend, + "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + printk("usbback: ring-ref %ld, event-channel %d\n", + ring_ref, evtchn); + + err = usbif_map(usbif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + +void usbback_do_hotplug(usbif_t *usbif) +{ + struct xenbus_transaction xbt; + struct xenbus_device *dev = usbif->xbdev; + struct usbstub *stub = NULL; + int err; + char port_str[8]; + int i; + int num_ports; + int state; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = xenbus_scanf(xbt, dev->nodename, + "num-ports", "%d", &num_ports); + + for (i = 1; i <= num_ports; i++) { + stub = find_attached_device(usbif, i); + if (stub) + state = stub->udev->speed; + else + state = 0; + sprintf(port_str, "port-%d", i); + err = xenbus_printf(xbt, dev->nodename, port_str, "%d", state); + if (err) { + xenbus_dev_fatal(dev, err, "writing port-%d state", i); + goto abort; + } + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "completing transaction"); + + return; + +abort: + xenbus_transaction_end(xbt, 1); +} + +void usbback_reconfigure(usbif_t *usbif) +{ + struct xenbus_device *dev = usbif->xbdev; + + if (dev->state == XenbusStateConnected) + xenbus_switch_state(dev, XenbusStateReconfiguring); +} + +void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + usbif_t *usbif = dev_get_drvdata(&dev->dev); + int err; + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + err = connect_ring(usbif); + if (err) + break; + start_xenusbd(usbif); + usbback_do_hotplug(usbif); + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateConnected: + if (dev->state == XenbusStateConnected) + break; + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + usbif_disconnect(usbif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + break; + + case XenbusStateReconfiguring: + usbback_do_hotplug(usbif); + xenbus_switch_state(dev, XenbusStateReconfigured); + break; + + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + +static const struct xenbus_device_id usbback_ids[] = { + { "vusb" }, + { "" }, +}; + +static struct xenbus_driver usbback_driver = { + .name = "vusb", + .ids = usbback_ids, + .probe = usbback_probe, + .otherend_changed = frontend_changed, + .remove = usbback_remove, +}; + +int usbback_xenbus_init(void) +{ + return xenbus_register_backend(&usbback_driver); +} + +void usbback_xenbus_exit(void) +{ + xenbus_unregister_driver(&usbback_driver); +} --- linux-ec2-2.6.31.orig/drivers/xen/usbback/usbstub.c +++ linux-ec2-2.6.31/drivers/xen/usbback/usbstub.c @@ -0,0 +1,447 @@ +/* + * usbstub.c + * + * USB stub driver - grabbing and managing USB devices. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * or, by your choice, + * + * When distributed separately from the Linux kernel or incorporated into + * other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "usbback.h" + +static LIST_HEAD(usbstub_ids); +static DEFINE_SPINLOCK(usbstub_ids_lock); +static LIST_HEAD(grabbed_devices); +static DEFINE_SPINLOCK(grabbed_devices_lock); + +struct usbstub *find_grabbed_device(int dom_id, int dev_id, int portnum) +{ + struct usbstub *stub; + int found = 0; + unsigned long flags; + + spin_lock_irqsave(&grabbed_devices_lock, flags); + list_for_each_entry(stub, &grabbed_devices, grabbed_list) { + if (stub->id->dom_id == dom_id + && stub->id->dev_id == dev_id + && stub->id->portnum == portnum) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&grabbed_devices_lock, flags); + + if (found) + return stub; + + return NULL; +} + +static struct usbstub *usbstub_alloc(struct usb_interface *interface, + struct usbstub_id *stub_id) +{ + struct usbstub *stub; + + stub = kzalloc(sizeof(*stub), GFP_KERNEL); + if (!stub) { + printk(KERN_ERR "no memory for alloc usbstub\n"); + return NULL; + } + + stub->udev = usb_get_dev(interface_to_usbdev(interface)); + stub->interface = interface; + stub->id = stub_id; + spin_lock_init(&stub->submitting_lock); + INIT_LIST_HEAD(&stub->submitting_list); + + return stub; +} + +static int usbstub_free(struct usbstub *stub) +{ + if (!stub) + return -EINVAL; + + usb_put_dev(stub->udev); + stub->interface = NULL; + stub->udev = NULL; + stub->id = NULL; + kfree(stub); + + return 0; +} + +static int usbstub_match_one(struct usb_interface *interface, + struct usbstub_id *stub_id) +{ + const char *udev_busid = dev_name(interface->dev.parent); + + if (!(strncmp(stub_id->bus_id, udev_busid, USBBACK_BUS_ID_SIZE))) { + return 1; + } + + return 0; +} + +static struct usbstub_id *usbstub_match(struct usb_interface *interface) +{ + struct usb_device *udev = interface_to_usbdev(interface); + struct usbstub_id *stub_id; + unsigned long flags; + int found = 0; + + /* hub currently not supported, so skip. */ + if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) + return NULL; + + spin_lock_irqsave(&usbstub_ids_lock, flags); + list_for_each_entry(stub_id, &usbstub_ids, id_list) { + if (usbstub_match_one(interface, stub_id)) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&usbstub_ids_lock, flags); + + if (found) + return stub_id; + + return NULL; +} + +static void add_to_grabbed_devices(struct usbstub *stub) +{ + unsigned long flags; + + spin_lock_irqsave(&grabbed_devices_lock, flags); + list_add(&stub->grabbed_list, &grabbed_devices); + spin_unlock_irqrestore(&grabbed_devices_lock, flags); +} + +static void remove_from_grabbed_devices(struct usbstub *stub) +{ + unsigned long flags; + + spin_lock_irqsave(&grabbed_devices_lock, flags); + list_del(&stub->grabbed_list); + spin_unlock_irqrestore(&grabbed_devices_lock, flags); +} + +static int usbstub_probe(struct usb_interface *interface, + const struct usb_device_id *id) +{ + struct usbstub_id *stub_id = NULL; + struct usbstub *stub = NULL; + usbif_t *usbif = NULL; + int retval = 0; + + if ((stub_id = usbstub_match(interface))) { + stub = usbstub_alloc(interface, stub_id); + if (!stub) + return -ENOMEM; + + usb_set_intfdata(interface, stub); + add_to_grabbed_devices(stub); + usbif = find_usbif(stub_id->dom_id, stub_id->dev_id); + if (usbif) { + usbbk_plug_device(usbif, stub); + usbback_reconfigure(usbif); + } + + } else + retval = -ENODEV; + + return retval; +} + +static void usbstub_disconnect(struct usb_interface *interface) +{ + struct usbstub *stub + = (struct usbstub *) usb_get_intfdata(interface); + + usb_set_intfdata(interface, NULL); + + if (!stub) + return; + + if (stub->usbif) { + usbback_reconfigure(stub->usbif); + usbbk_unplug_device(stub->usbif, stub); + } + + usbbk_unlink_urbs(stub); + + remove_from_grabbed_devices(stub); + + usbstub_free(stub); + + return; +} + +static inline int str_to_vport(const char *buf, + char *phys_bus, + int *dom_id, + int *dev_id, + int *port) +{ + char *p; + int len; + int err; + + /* no physical bus */ + if (!(p = strchr(buf, ':'))) + return -EINVAL; + + len = p - buf; + + /* bad physical bus */ + if (len + 1 > USBBACK_BUS_ID_SIZE) + return -EINVAL; + + strlcpy(phys_bus, buf, len + 1); + err = sscanf(p + 1, "%d:%d:%d", dom_id, dev_id, port); + if (err == 3) + return 0; + else + return -EINVAL; +} + +static int usbstub_id_add(const char *bus_id, + const int dom_id, + const int dev_id, + const int portnum) +{ + struct usbstub_id *stub_id; + unsigned long flags; + + stub_id = kzalloc(sizeof(*stub_id), GFP_KERNEL); + if (!stub_id) + return -ENOMEM; + + stub_id->dom_id = dom_id; + stub_id->dev_id = dev_id; + stub_id->portnum = portnum; + + strncpy(stub_id->bus_id, bus_id, USBBACK_BUS_ID_SIZE); + + spin_lock_irqsave(&usbstub_ids_lock, flags); + list_add(&stub_id->id_list, &usbstub_ids); + spin_unlock_irqrestore(&usbstub_ids_lock, flags); + + return 0; +} + +static int usbstub_id_remove(const char *phys_bus, + const int dom_id, + const int dev_id, + const int portnum) +{ + struct usbstub_id *stub_id, *tmp; + int err = -ENOENT; + unsigned long flags; + + spin_lock_irqsave(&usbstub_ids_lock, flags); + list_for_each_entry_safe(stub_id, tmp, &usbstub_ids, id_list) { + if (stub_id->dom_id == dom_id + && stub_id->dev_id == dev_id + && stub_id->portnum == portnum) { + list_del(&stub_id->id_list); + kfree(stub_id); + + err = 0; + } + } + spin_unlock_irqrestore(&usbstub_ids_lock, flags); + + return err; +} + +static ssize_t usbstub_vport_add(struct device_driver *driver, + const char *buf, size_t count) +{ + int err = 0; + + char bus_id[USBBACK_BUS_ID_SIZE]; + int dom_id; + int dev_id; + int portnum; + + err = str_to_vport(buf, &bus_id[0], &dom_id, &dev_id, &portnum); + if (err) + goto out; + + err = usbstub_id_add(&bus_id[0], dom_id, dev_id, portnum); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(new_vport, S_IWUSR, NULL, usbstub_vport_add); + +static ssize_t usbstub_vport_remove(struct device_driver *driver, + const char *buf, size_t count) +{ + int err = 0; + + char bus_id[USBBACK_BUS_ID_SIZE]; + int dom_id; + int dev_id; + int portnum; + + err = str_to_vport(buf, &bus_id[0], &dom_id, &dev_id, &portnum); + if (err) + goto out; + + err = usbstub_id_remove(&bus_id[0], dom_id, dev_id, portnum); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(remove_vport, S_IWUSR, NULL, usbstub_vport_remove); + +static ssize_t usbstub_vport_show(struct device_driver *driver, + char *buf) +{ + struct usbstub_id *stub_id; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&usbstub_ids_lock, flags); + list_for_each_entry(stub_id, &usbstub_ids, id_list) { + if (count >= PAGE_SIZE) + break; + count += scnprintf((char *)buf + count, PAGE_SIZE - count, + "%s:%d:%d:%d\n", + &stub_id->bus_id[0], + stub_id->dom_id, + stub_id->dev_id, + stub_id->portnum); + } + spin_unlock_irqrestore(&usbstub_ids_lock, flags); + + return count; +} + +DRIVER_ATTR(vports, S_IRUSR, usbstub_vport_show, NULL); + +static ssize_t usbstub_devices_show(struct device_driver *driver, + char *buf) +{ + struct usbstub *stub; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&grabbed_devices_lock, flags); + list_for_each_entry(stub, &grabbed_devices, grabbed_list) { + if (count >= PAGE_SIZE) + break; + + count += scnprintf((char *)buf + count, PAGE_SIZE - count, + "%u-%s:%u.%u\n", + stub->udev->bus->busnum, + stub->udev->devpath, + stub->udev->config->desc.bConfigurationValue, + stub->interface->cur_altsetting->desc.bInterfaceNumber); + + } + spin_unlock_irqrestore(&grabbed_devices_lock, flags); + + return count; +} + +DRIVER_ATTR(grabbed_devices, S_IRUSR, usbstub_devices_show, NULL); + +/* table of devices that matches any usbdevice */ +static const struct usb_device_id usbstub_table[] = { + { .driver_info = 1 }, /* wildcard, see usb_match_id() */ + { } /* Terminating entry */ +}; +MODULE_DEVICE_TABLE(usb, usbstub_table); + +static struct usb_driver usbback_usb_driver = { + .name = "usbback", + .probe = usbstub_probe, + .disconnect = usbstub_disconnect, + .id_table = usbstub_table, +}; + +int __init usbstub_init(void) +{ + int err; + + err = usb_register(&usbback_usb_driver); + if (err < 0) + goto out; + if (!err) + err = driver_create_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_new_vport); + if (!err) + err = driver_create_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_remove_vport); + if (!err) + err = driver_create_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_vports); + if (!err) + err = driver_create_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_grabbed_devices); + if (err) + usbstub_exit(); + +out: + return err; +} + +void usbstub_exit(void) +{ + driver_remove_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_new_vport); + driver_remove_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_remove_vport); + driver_remove_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_vports); + driver_remove_file(&usbback_usb_driver.drvwrap.driver, + &driver_attr_grabbed_devices); + + usb_deregister(&usbback_usb_driver); +} --- linux-ec2-2.6.31.orig/drivers/xen/usbback/usbback.c +++ linux-ec2-2.6.31/drivers/xen/usbback/usbback.c @@ -0,0 +1,1095 @@ +/* + * usbback.c + * + * Xen USB backend driver + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * or, by your choice, + * + * When distributed separately from the Linux kernel or incorporated into + * other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "usbback.h" + +#if 0 +#include "../../usb/core/hub.h" +#endif + +int usbif_reqs = USBIF_BACK_MAX_PENDING_REQS; +module_param_named(reqs, usbif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of usbback requests to allocate"); + +struct pending_req_segment { + uint16_t offset; + uint16_t length; +}; + +typedef struct { + usbif_t *usbif; + + uint16_t id; /* request id */ + + struct usbstub *stub; + struct list_head urb_list; + + /* urb */ + struct urb *urb; + void *buffer; + dma_addr_t transfer_dma; + struct usb_ctrlrequest *setup; + dma_addr_t setup_dma; + + /* request segments */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + uint16_t nr_extra_segs; /* number of iso_frame_desc segments (ISO) */ + struct pending_req_segment *seg; + + struct list_head free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define USBBACK_INVALID_HANDLE (~0) + +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * USBIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static inline void add_req_to_submitting_list(struct usbstub *stub, pending_req_t *pending_req) +{ + unsigned long flags; + + spin_lock_irqsave(&stub->submitting_lock, flags); + list_add_tail(&pending_req->urb_list, &stub->submitting_list); + spin_unlock_irqrestore(&stub->submitting_lock, flags); +} + +static inline void remove_req_from_submitting_list(struct usbstub *stub, pending_req_t *pending_req) +{ + unsigned long flags; + + spin_lock_irqsave(&stub->submitting_lock, flags); + list_del_init(&pending_req->urb_list); + spin_unlock_irqrestore(&stub->submitting_lock, flags); +} + +void usbbk_unlink_urbs(struct usbstub *stub) +{ + pending_req_t *req, *tmp; + unsigned long flags; + + spin_lock_irqsave(&stub->submitting_lock, flags); + list_for_each_entry_safe(req, tmp, &stub->submitting_list, urb_list) { + usb_unlink_urb(req->urb); + } + spin_unlock_irqrestore(&stub->submitting_lock, flags); +} + +static void fast_flush_area(pending_req_t *pending_req) +{ + struct gnttab_unmap_grant_ref unmap[USBIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, nr_segs, invcount = 0; + grant_handle_t handle; + int ret; + + nr_segs = pending_req->nr_buffer_segs + pending_req->nr_extra_segs; + + if (nr_segs) { + for (i = 0; i < nr_segs; i++) { + handle = pending_handle(pending_req, i); + if (handle == USBBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(pending_req, i), + GNTMAP_host_map, handle); + pending_handle(pending_req, i) = USBBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + + kfree(pending_req->seg); + } + + return; +} + +static void copy_buff_to_pages(void *buff, pending_req_t *pending_req, + int start, int nr_pages) +{ + unsigned long copied = 0; + int i; + + for (i = start; i < start + nr_pages; i++) { + memcpy((void *) vaddr(pending_req, i) + pending_req->seg[i].offset, + buff + copied, + pending_req->seg[i].length); + copied += pending_req->seg[i].length; + } +} + +static void copy_pages_to_buff(void *buff, pending_req_t *pending_req, + int start, int nr_pages) +{ + unsigned long copied = 0; + int i; + + for (i = start; i < start + nr_pages; i++) { + memcpy(buff + copied, + (void *) vaddr(pending_req, i) + pending_req->seg[i].offset, + pending_req->seg[i].length); + copied += pending_req->seg[i].length; + } +} + +static int usbbk_alloc_urb(usbif_request_t *req, pending_req_t *pending_req) +{ + int ret; + + if (usb_pipeisoc(req->pipe)) + pending_req->urb = usb_alloc_urb(req->u.isoc.number_of_packets, GFP_KERNEL); + else + pending_req->urb = usb_alloc_urb(0, GFP_KERNEL); + if (!pending_req->urb) { + printk(KERN_ERR "usbback: can't alloc urb\n"); + ret = -ENOMEM; + goto fail; + } + + if (req->buffer_length) { + pending_req->buffer = usb_buffer_alloc(pending_req->stub->udev, + req->buffer_length, GFP_KERNEL, + &pending_req->transfer_dma); + if (!pending_req->buffer) { + printk(KERN_ERR "usbback: can't alloc urb buffer\n"); + ret = -ENOMEM; + goto fail_free_urb; + } + } + + if (usb_pipecontrol(req->pipe)) { + pending_req->setup = usb_buffer_alloc(pending_req->stub->udev, + sizeof(struct usb_ctrlrequest), GFP_KERNEL, + &pending_req->setup_dma); + if (!pending_req->setup) { + printk(KERN_ERR "usbback: can't alloc usb_ctrlrequest\n"); + ret = -ENOMEM; + goto fail_free_buffer; + } + } + + return 0; + +fail_free_buffer: + if (req->buffer_length) + usb_buffer_free(pending_req->stub->udev, req->buffer_length, + pending_req->buffer, pending_req->transfer_dma); +fail_free_urb: + usb_free_urb(pending_req->urb); +fail: + return ret; +} + +static void usbbk_free_urb(struct urb *urb) +{ + if (usb_pipecontrol(urb->pipe)) + usb_buffer_free(urb->dev, sizeof(struct usb_ctrlrequest), + urb->setup_packet, urb->setup_dma); + if (urb->transfer_buffer_length) + usb_buffer_free(urb->dev, urb->transfer_buffer_length, + urb->transfer_buffer, urb->transfer_dma); + barrier(); + usb_free_urb(urb); +} + +static void usbbk_notify_work(usbif_t *usbif) +{ + usbif->waiting_reqs = 1; + wake_up(&usbif->wq); +} + +irqreturn_t usbbk_be_int(int irq, void *dev_id) +{ + usbbk_notify_work(dev_id); + return IRQ_HANDLED; +} + +static void usbbk_do_response(pending_req_t *pending_req, int32_t status, + int32_t actual_length, int32_t error_count, uint16_t start_frame) +{ + usbif_t *usbif = pending_req->usbif; + usbif_response_t *ring_res; + unsigned long flags; + int notify; + + spin_lock_irqsave(&usbif->ring_lock, flags); + ring_res = RING_GET_RESPONSE(&usbif->ring, usbif->ring.rsp_prod_pvt); + ring_res->id = pending_req->id; + ring_res->status = status; + ring_res->actual_length = actual_length; + ring_res->error_count = error_count; + ring_res->start_frame = start_frame; + usbif->ring.rsp_prod_pvt++; + barrier(); + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&usbif->ring, notify); + spin_unlock_irqrestore(&usbif->ring_lock, flags); + + if (notify) + notify_remote_via_irq(usbif->irq); +} + +static void usbbk_urb_complete(struct urb *urb) +{ + pending_req_t *pending_req = (pending_req_t *)urb->context; + + if (usb_pipein(urb->pipe) && urb->status == 0 && urb->actual_length > 0) + copy_buff_to_pages(pending_req->buffer, pending_req, + 0, pending_req->nr_buffer_segs); + + if (usb_pipeisoc(urb->pipe)) + copy_buff_to_pages(&urb->iso_frame_desc[0], pending_req, + pending_req->nr_buffer_segs, pending_req->nr_extra_segs); + + barrier(); + + fast_flush_area(pending_req); + + usbbk_do_response(pending_req, urb->status, urb->actual_length, + urb->error_count, urb->start_frame); + + remove_req_from_submitting_list(pending_req->stub, pending_req); + + barrier(); + usbbk_free_urb(urb); + usbif_put(pending_req->usbif); + free_req(pending_req); +} + +static int usbbk_gnttab_map(usbif_t *usbif, + usbif_request_t *req, pending_req_t *pending_req) +{ + int i, ret; + unsigned int nr_segs; + uint32_t flags; + struct gnttab_map_grant_ref map[USBIF_MAX_SEGMENTS_PER_REQUEST]; + + nr_segs = pending_req->nr_buffer_segs + pending_req->nr_extra_segs; + + if (nr_segs > USBIF_MAX_SEGMENTS_PER_REQUEST) { + printk(KERN_ERR "Bad number of segments in request\n"); + ret = -EINVAL; + goto fail; + } + + if (nr_segs) { + pending_req->seg = kmalloc(sizeof(struct pending_req_segment) + * nr_segs, GFP_KERNEL); + if (!pending_req->seg) { + ret = -ENOMEM; + goto fail; + } + + if (pending_req->nr_buffer_segs) { + flags = GNTMAP_host_map; + if (usb_pipeout(req->pipe)) + flags |= GNTMAP_readonly; + for (i = 0; i < pending_req->nr_buffer_segs; i++) + gnttab_set_map_op(&map[i], vaddr( + pending_req, i), flags, + req->seg[i].gref, + usbif->domid); + } + + if (pending_req->nr_extra_segs) { + flags = GNTMAP_host_map; + for (i = req->nr_buffer_segs; i < nr_segs; i++) + gnttab_set_map_op(&map[i], vaddr( + pending_req, i), flags, + req->seg[i].gref, + usbif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + map, nr_segs); + BUG_ON(ret); + + for (i = 0; i < nr_segs; i++) { + if (unlikely(map[i].status != 0)) { + printk(KERN_ERR "usbback: invalid buffer -- could not remap it\n"); + map[i].handle = USBBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + + pending_req->seg[i].offset = req->seg[i].offset; + pending_req->seg[i].length = req->seg[i].length; + + barrier(); + + if (pending_req->seg[i].offset >= PAGE_SIZE || + pending_req->seg[i].length > PAGE_SIZE || + pending_req->seg[i].offset + pending_req->seg[i].length > PAGE_SIZE) + ret |= 1; + } + + if (ret) + goto fail_flush; + } + + return 0; + +fail_flush: + fast_flush_area(pending_req); + ret = -ENOMEM; + +fail: + return ret; +} + +static void usbbk_init_urb(usbif_request_t *req, pending_req_t *pending_req) +{ + unsigned int pipe; + struct usb_device *udev = pending_req->stub->udev; + struct urb *urb = pending_req->urb; + + switch (usb_pipetype(req->pipe)) { + case PIPE_ISOCHRONOUS: + if (usb_pipein(req->pipe)) + pipe = usb_rcvisocpipe(udev, usb_pipeendpoint(req->pipe)); + else + pipe = usb_sndisocpipe(udev, usb_pipeendpoint(req->pipe)); + + urb->dev = udev; + urb->pipe = pipe; + urb->transfer_flags = req->transfer_flags; + urb->transfer_flags |= URB_ISO_ASAP; + urb->transfer_buffer = pending_req->buffer; + urb->transfer_buffer_length = req->buffer_length; + urb->complete = usbbk_urb_complete; + urb->context = pending_req; + urb->interval = req->u.isoc.interval; + urb->start_frame = req->u.isoc.start_frame; + urb->number_of_packets = req->u.isoc.number_of_packets; + + break; + case PIPE_INTERRUPT: + if (usb_pipein(req->pipe)) + pipe = usb_rcvintpipe(udev, usb_pipeendpoint(req->pipe)); + else + pipe = usb_sndintpipe(udev, usb_pipeendpoint(req->pipe)); + + usb_fill_int_urb(urb, udev, pipe, + pending_req->buffer, req->buffer_length, + usbbk_urb_complete, + pending_req, req->u.intr.interval); + /* + * high speed interrupt endpoints use a logarithmic encoding of + * the endpoint interval, and usb_fill_int_urb() initializes a + * interrupt urb with the encoded interval value. + * + * req->u.intr.interval is the interval value that already + * encoded in the frontend part, and the above usb_fill_int_urb() + * initializes the urb->interval with double encoded value. + * + * so, simply overwrite the urb->interval with original value. + */ + urb->interval = req->u.intr.interval; + urb->transfer_flags = req->transfer_flags; + + break; + case PIPE_CONTROL: + if (usb_pipein(req->pipe)) + pipe = usb_rcvctrlpipe(udev, 0); + else + pipe = usb_sndctrlpipe(udev, 0); + + usb_fill_control_urb(urb, udev, pipe, + (unsigned char *) pending_req->setup, + pending_req->buffer, req->buffer_length, + usbbk_urb_complete, pending_req); + memcpy(pending_req->setup, req->u.ctrl, 8); + urb->setup_dma = pending_req->setup_dma; + urb->transfer_flags = req->transfer_flags; + urb->transfer_flags |= URB_NO_SETUP_DMA_MAP; + + break; + case PIPE_BULK: + if (usb_pipein(req->pipe)) + pipe = usb_rcvbulkpipe(udev, usb_pipeendpoint(req->pipe)); + else + pipe = usb_sndbulkpipe(udev, usb_pipeendpoint(req->pipe)); + + usb_fill_bulk_urb(urb, udev, pipe, + pending_req->buffer, req->buffer_length, + usbbk_urb_complete, pending_req); + urb->transfer_flags = req->transfer_flags; + + break; + default: + break; + } + + if (req->buffer_length) { + urb->transfer_dma = pending_req->transfer_dma; + urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + } +} + +struct set_interface_request { + pending_req_t *pending_req; + int interface; + int alternate; + struct work_struct work; +}; + +static void usbbk_set_interface_work(struct work_struct *arg) +{ + struct set_interface_request *req + = container_of(arg, struct set_interface_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = req->pending_req->stub->udev; + + int ret; + + usb_lock_device(udev); + ret = usb_set_interface(udev, req->interface, req->alternate); + usb_unlock_device(udev); + usb_put_dev(udev); + + usbbk_do_response(pending_req, ret, 0, 0, 0); + usbif_put(pending_req->usbif); + free_req(pending_req); + kfree(req); +} + +static int usbbk_set_interface(pending_req_t *pending_req, int interface, int alternate) +{ + struct set_interface_request *req; + struct usb_device *udev = pending_req->stub->udev; + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + req->pending_req = pending_req; + req->interface = interface; + req->alternate = alternate; + INIT_WORK(&req->work, usbbk_set_interface_work); + usb_get_dev(udev); + schedule_work(&req->work); + return 0; +} + +struct clear_halt_request { + pending_req_t *pending_req; + int pipe; + struct work_struct work; +}; + +static void usbbk_clear_halt_work(struct work_struct *arg) +{ + struct clear_halt_request *req + = container_of(arg, struct clear_halt_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = req->pending_req->stub->udev; + int ret; + + usb_lock_device(udev); + ret = usb_clear_halt(req->pending_req->stub->udev, req->pipe); + usb_unlock_device(udev); + usb_put_dev(udev); + + usbbk_do_response(pending_req, ret, 0, 0, 0); + usbif_put(pending_req->usbif); + free_req(pending_req); + kfree(req); +} + +static int usbbk_clear_halt(pending_req_t *pending_req, int pipe) +{ + struct clear_halt_request *req; + struct usb_device *udev = pending_req->stub->udev; + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + req->pending_req = pending_req; + req->pipe = pipe; + INIT_WORK(&req->work, usbbk_clear_halt_work); + + usb_get_dev(udev); + schedule_work(&req->work); + return 0; +} + +#if 0 +struct port_reset_request { + pending_req_t *pending_req; + struct work_struct work; +}; + +static void usbbk_port_reset_work(struct work_struct *arg) +{ + struct port_reset_request *req + = container_of(arg, struct port_reset_request, work); + pending_req_t *pending_req = req->pending_req; + struct usb_device *udev = pending_req->stub->udev; + int ret, ret_lock; + + ret = ret_lock = usb_lock_device_for_reset(udev, NULL); + if (ret_lock >= 0) { + ret = usb_reset_device(udev); + if (ret_lock) + usb_unlock_device(udev); + } + usb_put_dev(udev); + + usbbk_do_response(pending_req, ret, 0, 0, 0); + usbif_put(pending_req->usbif); + free_req(pending_req); + kfree(req); +} + +static int usbbk_port_reset(pending_req_t *pending_req) +{ + struct port_reset_request *req; + struct usb_device *udev = pending_req->stub->udev; + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->pending_req = pending_req; + INIT_WORK(&req->work, usbbk_port_reset_work); + + usb_get_dev(udev); + schedule_work(&req->work); + return 0; +} +#endif + +static void usbbk_set_address(usbif_t *usbif, struct usbstub *stub, int cur_addr, int new_addr) +{ + unsigned long flags; + + spin_lock_irqsave(&usbif->addr_lock, flags); + if (cur_addr) + usbif->addr_table[cur_addr] = NULL; + if (new_addr) + usbif->addr_table[new_addr] = stub; + stub->addr = new_addr; + spin_unlock_irqrestore(&usbif->addr_lock, flags); +} + +struct usbstub *find_attached_device(usbif_t *usbif, int portnum) +{ + struct usbstub *stub; + int found = 0; + unsigned long flags; + + spin_lock_irqsave(&usbif->plug_lock, flags); + list_for_each_entry(stub, &usbif->plugged_devices, plugged_list) { + if (stub->id->portnum == portnum) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&usbif->plug_lock, flags); + + if (found) + return stub; + + return NULL; +} + +static int check_and_submit_special_ctrlreq(usbif_t *usbif, usbif_request_t *req, pending_req_t *pending_req) +{ + int devnum; + struct usbstub *stub = NULL; + struct usb_ctrlrequest *ctrl = (struct usb_ctrlrequest *) req->u.ctrl; + int ret; + int done = 0; + + devnum = usb_pipedevice(req->pipe); + + /* + * When the device is first connected or reseted, USB device has no address. + * In this initial state, following requests are send to device address (#0), + * + * 1. GET_DESCRIPTOR (with Descriptor Type is "DEVICE") is send, + * and OS knows what device is connected to. + * + * 2. SET_ADDRESS is send, and then, device has its address. + * + * In the next step, SET_CONFIGURATION is send to addressed device, and then, + * the device is finally ready to use. + */ + if (unlikely(devnum == 0)) { + stub = find_attached_device(usbif, usbif_pipeportnum(req->pipe)); + if (unlikely(!stub)) { + ret = -ENODEV; + goto fail_response; + } + + switch (ctrl->bRequest) { + case USB_REQ_GET_DESCRIPTOR: + /* + * GET_DESCRIPTOR request to device #0. + * through to normal urb transfer. + */ + pending_req->stub = stub; + return 0; + break; + case USB_REQ_SET_ADDRESS: + /* + * SET_ADDRESS request to device #0. + * add attached device to addr_table. + */ + { + __u16 addr = le16_to_cpu(ctrl->wValue); + usbbk_set_address(usbif, stub, 0, addr); + } + ret = 0; + goto fail_response; + break; + default: + ret = -EINVAL; + goto fail_response; + } + } else { + if (unlikely(!usbif->addr_table[devnum])) { + ret = -ENODEV; + goto fail_response; + } + pending_req->stub = usbif->addr_table[devnum]; + } + + /* + * Check special request + */ + switch (ctrl->bRequest) { + case USB_REQ_SET_ADDRESS: + /* + * SET_ADDRESS request to addressed device. + * change addr or remove from addr_table. + */ + { + __u16 addr = le16_to_cpu(ctrl->wValue); + usbbk_set_address(usbif, stub, devnum, addr); + } + ret = 0; + goto fail_response; + break; +#if 0 + case USB_REQ_SET_CONFIGURATION: + /* + * linux 2.6.27 or later version only! + */ + if (ctrl->RequestType == USB_RECIP_DEVICE) { + __u16 config = le16_to_cpu(ctrl->wValue); + usb_driver_set_configuration(pending_req->stub->udev, config); + done = 1; + } + break; +#endif + case USB_REQ_SET_INTERFACE: + if (ctrl->bRequestType == USB_RECIP_INTERFACE) { + __u16 alt = le16_to_cpu(ctrl->wValue); + __u16 intf = le16_to_cpu(ctrl->wIndex); + usbbk_set_interface(pending_req, intf, alt); + done = 1; + } + break; + case USB_REQ_CLEAR_FEATURE: + if (ctrl->bRequestType == USB_RECIP_ENDPOINT + && ctrl->wValue == USB_ENDPOINT_HALT) { + int pipe; + int ep = le16_to_cpu(ctrl->wIndex) & 0x0f; + int dir = le16_to_cpu(ctrl->wIndex) + & USB_DIR_IN; + if (dir) + pipe = usb_rcvctrlpipe(pending_req->stub->udev, ep); + else + pipe = usb_sndctrlpipe(pending_req->stub->udev, ep); + usbbk_clear_halt(pending_req, pipe); + done = 1; + } + break; +#if 0 /* not tested yet */ + case USB_REQ_SET_FEATURE: + if (ctrl->bRequestType == USB_RT_PORT) { + __u16 feat = le16_to_cpu(ctrl->wValue); + if (feat == USB_PORT_FEAT_RESET) { + usbbk_port_reset(pending_req); + done = 1; + } + } + break; +#endif + default: + break; + } + + return done; + +fail_response: + usbbk_do_response(pending_req, ret, 0, 0, 0); + usbif_put(usbif); + free_req(pending_req); + return 1; +} + +static void dispatch_request_to_pending_reqs(usbif_t *usbif, + usbif_request_t *req, + pending_req_t *pending_req) +{ + int ret; + + pending_req->id = req->id; + pending_req->usbif = usbif; + + barrier(); + + /* + * TODO: + * receive unlink request and cancel the urb in backend + */ +#if 0 + if (unlikely(usb_pipeunlink(req->pipe))) { + + } +#endif + + usbif_get(usbif); + + if (usb_pipecontrol(req->pipe)) { + if (check_and_submit_special_ctrlreq(usbif, req, pending_req)) + return; + } else { + int devnum = usb_pipedevice(req->pipe); + if (unlikely(!usbif->addr_table[devnum])) { + ret = -ENODEV; + goto fail_response; + } + pending_req->stub = usbif->addr_table[devnum]; + } + + barrier(); + + ret = usbbk_alloc_urb(req, pending_req); + if (ret) { + ret = -ESHUTDOWN; + goto fail_response; + } + + add_req_to_submitting_list(pending_req->stub, pending_req); + + barrier(); + + usbbk_init_urb(req, pending_req); + + barrier(); + + pending_req->nr_buffer_segs = req->nr_buffer_segs; + if (usb_pipeisoc(req->pipe)) + pending_req->nr_extra_segs = req->u.isoc.nr_frame_desc_segs; + else + pending_req->nr_extra_segs = 0; + + barrier(); + + ret = usbbk_gnttab_map(usbif, req, pending_req); + if (ret) { + printk(KERN_ERR "usbback: invalid buffer\n"); + ret = -ESHUTDOWN; + goto fail_free_urb; + } + + barrier(); + + if (usb_pipeout(req->pipe) && req->buffer_length) + copy_pages_to_buff(pending_req->buffer, + pending_req, + 0, + pending_req->nr_buffer_segs); + if (usb_pipeisoc(req->pipe)) { + copy_pages_to_buff(&pending_req->urb->iso_frame_desc[0], + pending_req, + pending_req->nr_buffer_segs, + pending_req->nr_extra_segs); + } + + barrier(); + + ret = usb_submit_urb(pending_req->urb, GFP_KERNEL); + if (ret) { + printk(KERN_ERR "usbback: failed submitting urb, error %d\n", ret); + ret = -ESHUTDOWN; + goto fail_flush_area; + } + return; + +fail_flush_area: + fast_flush_area(pending_req); +fail_free_urb: + remove_req_from_submitting_list(pending_req->stub, pending_req); + barrier(); + usbbk_free_urb(pending_req->urb); +fail_response: + usbbk_do_response(pending_req, ret, 0, 0, 0); + usbif_put(usbif); + free_req(pending_req); +} + +static int usbbk_start_submit_urb(usbif_t *usbif) +{ + usbif_back_ring_t *usb_ring = &usbif->ring; + usbif_request_t *ring_req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = usb_ring->req_cons; + rp = usb_ring->sring->req_prod; + rmb(); + + while (rc != rp) { + if (RING_REQUEST_CONS_OVERFLOW(usb_ring, rc)) { + printk(KERN_WARNING "RING_REQUEST_CONS_OVERFLOW\n"); + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + more_to_do = 1; + break; + } + + ring_req = RING_GET_REQUEST(usb_ring, rc); + usb_ring->req_cons = ++rc; + + dispatch_request_to_pending_reqs(usbif, ring_req, + pending_req); + } + + RING_FINAL_CHECK_FOR_REQUESTS(&usbif->ring, more_to_do); + + cond_resched(); + + return more_to_do; +} + +int usbbk_schedule(void *arg) +{ + usbif_t *usbif = (usbif_t *)arg; + + usbif_get(usbif); + + while(!kthread_should_stop()) { + wait_event_interruptible( + usbif->wq, + usbif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + usbif->waiting_reqs = 0; + smp_mb(); + + if (usbbk_start_submit_urb(usbif)) + usbif->waiting_reqs = 1; + } + + usbif->xenusbd = NULL; + usbif_put(usbif); + + return 0; +} + +/* + * attach the grabbed device to usbif. + */ +void usbbk_plug_device(usbif_t *usbif, struct usbstub *stub) +{ + unsigned long flags; + + spin_lock_irqsave(&usbif->plug_lock, flags); + list_add(&stub->plugged_list, &usbif->plugged_devices); + spin_unlock_irqrestore(&usbif->plug_lock, flags); + stub->plugged = 1; + stub->usbif = usbif; +} + +/* + * detach the grabbed device from usbif. + */ +void usbbk_unplug_device(usbif_t *usbif, struct usbstub *stub) +{ + unsigned long flags; + + if (stub->addr) + usbbk_set_address(usbif, stub, stub->addr, 0); + spin_lock_irqsave(&usbif->plug_lock, flags); + list_del(&stub->plugged_list); + spin_unlock_irqrestore(&usbif->plug_lock, flags); + stub->plugged = 0; + stub->usbif = NULL; +} + +void detach_device_without_lock(usbif_t *usbif, struct usbstub *stub) +{ + if (stub->addr) + usbbk_set_address(usbif, stub, stub->addr, 0); + list_del(&stub->plugged_list); + stub->plugged = 0; + stub->usbif = NULL; +} + +static int __init usbback_init(void) +{ + int i, rc, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + if (usbstub_init()) + return -ENODEV; + + mmap_pages = usbif_reqs * USBIF_MAX_SEGMENTS_PER_REQUEST; + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + usbif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = USBBACK_INVALID_HANDLE; + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < usbif_reqs; i++) { + list_add_tail(&pending_reqs[i].free_list, &pending_free); + } + + rc = usbback_xenbus_init(); + if (rc) + goto fail; + + return 0; + + out_of_memory: + printk("%s: out of memory\n", __FUNCTION__); + rc = -ENOMEM; + fail: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + usbstub_exit(); + return rc; +} + +static void __exit usbback_exit(void) +{ + usbback_xenbus_exit(); + usbstub_exit(); + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, usbif_reqs * USBIF_MAX_SEGMENTS_PER_REQUEST); +} + +module_init(usbback_init); +module_exit(usbback_exit); + +MODULE_AUTHOR(""); +MODULE_DESCRIPTION("Xen USB backend driver (usbback)"); +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/usbback/Makefile +++ linux-ec2-2.6.31/drivers/xen/usbback/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_XEN_USB_BACKEND) := usbbk.o + +usbbk-y := usbstub.o xenbus.o interface.o usbback.o + --- linux-ec2-2.6.31.orig/drivers/xen/usbback/usbback.h +++ linux-ec2-2.6.31/drivers/xen/usbback/usbback.h @@ -0,0 +1,164 @@ +/* + * usbback.h + * + * This file is part of Xen USB backend driver. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * or, by your choice, + * + * When distributed separately from the Linux kernel or incorporated into + * other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_USBBACK_H__ +#define __XEN_USBBACK_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct usbstub; + +#ifndef BUS_ID_SIZE +#define USBBACK_BUS_ID_SIZE 20 +#else +#define USBBACK_BUS_ID_SIZE BUS_ID_SIZE +#endif + +#define USB_DEV_ADDR_SIZE 128 + +typedef struct usbif_st { + domid_t domid; + unsigned int handle; + struct xenbus_device *xbdev; + struct list_head usbif_list; + + unsigned int irq; + + usbif_back_ring_t ring; + struct vm_struct *ring_area; + + spinlock_t ring_lock; + atomic_t refcnt; + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; + + /* device address lookup table */ + spinlock_t addr_lock; + struct usbstub *addr_table[USB_DEV_ADDR_SIZE]; + + /* plugged device list */ + unsigned plaggable:1; + spinlock_t plug_lock; + struct list_head plugged_devices; + + /* request schedule */ + struct task_struct *xenusbd; + unsigned int waiting_reqs; + wait_queue_head_t waiting_to_free; + wait_queue_head_t wq; + +} usbif_t; + +struct usbstub_id +{ + struct list_head id_list; + + char bus_id[USBBACK_BUS_ID_SIZE]; + int dom_id; + int dev_id; + int portnum; +}; + +struct usbstub +{ + struct usbstub_id *id; + struct usb_device *udev; + struct usb_interface *interface; + usbif_t *usbif; + + struct list_head grabbed_list; + + unsigned plugged:1; + struct list_head plugged_list; + + int addr; + + spinlock_t submitting_lock; + struct list_head submitting_list; +}; + +usbif_t *usbif_alloc(domid_t domid, unsigned int handle); +void usbif_disconnect(usbif_t *usbif); +void usbif_free(usbif_t *usbif); +int usbif_map(usbif_t *usbif, unsigned long shared_page, unsigned int evtchn); + +#define usbif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define usbif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free); \ + } while (0) + +int usbback_xenbus_init(void); +void usbback_xenbus_exit(void); + +irqreturn_t usbbk_be_int(int irq, void *dev_id); +int usbbk_schedule(void *arg); +struct usbstub *find_attached_device(usbif_t *usbif, int port); +struct usbstub *find_grabbed_device(int dom_id, int dev_id, int port); +usbif_t *find_usbif(int dom_id, int dev_id); +void usbback_reconfigure(usbif_t *usbif); +void usbbk_plug_device(usbif_t *usbif, struct usbstub *stub); +void usbbk_unplug_device(usbif_t *usbif, struct usbstub *stub); +void detach_device_without_lock(usbif_t *usbif, struct usbstub *stub); +void usbbk_unlink_urbs(struct usbstub *stub); + +int usbstub_init(void); +void usbstub_exit(void); + +#endif /* __XEN_USBBACK_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/interface.c +++ linux-ec2-2.6.31/drivers/xen/scsiback/interface.c @@ -0,0 +1,182 @@ +/* + * interface management. + * + * Copyright (c) 2008, FUJITSU Limited + * + * Based on the blkback driver code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include "common.h" + +#include +#include + + +static struct kmem_cache *scsiback_cachep; + +struct vscsibk_info *vscsibk_info_alloc(domid_t domid) +{ + struct vscsibk_info *info; + + info = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + memset(info, 0, sizeof(*info)); + info->domid = domid; + spin_lock_init(&info->ring_lock); + atomic_set(&info->nr_unreplied_reqs, 0); + init_waitqueue_head(&info->wq); + init_waitqueue_head(&info->waiting_to_free); + + return info; +} + +static int map_frontend_page( struct vscsibk_info *info, + unsigned long ring_ref) +{ + struct gnttab_map_grant_ref op; + int err; + + gnttab_set_map_op(&op, (unsigned long)info->ring_area->addr, + GNTMAP_host_map, ring_ref, + info->domid); + + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + BUG_ON(err); + + if (op.status) { + printk(KERN_ERR "scsiback: Grant table operation failure !\n"); + return op.status; + } + + info->shmem_ref = ring_ref; + info->shmem_handle = op.handle; + + return (GNTST_okay); +} + +static void unmap_frontend_page(struct vscsibk_info *info) +{ + struct gnttab_unmap_grant_ref op; + int err; + + gnttab_set_unmap_op(&op, (unsigned long)info->ring_area->addr, + GNTMAP_host_map, info->shmem_handle); + + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + BUG_ON(err); + +} + +int scsiback_init_sring(struct vscsibk_info *info, + unsigned long ring_ref, unsigned int evtchn) +{ + struct vscsiif_sring *sring; + int err; + + if (info->irq) { + printk(KERN_ERR "scsiback: Already connected through?\n"); + return -1; + } + + info->ring_area = alloc_vm_area(PAGE_SIZE); + if (!info) + return -ENOMEM; + + err = map_frontend_page(info, ring_ref); + if (err) + goto free_vm; + + sring = (struct vscsiif_sring *) info->ring_area->addr; + BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); + + err = bind_interdomain_evtchn_to_irqhandler( + info->domid, evtchn, + scsiback_intr, 0, "vscsiif-backend", info); + + if (err < 0) + goto unmap_page; + + info->irq = err; + + return 0; + +unmap_page: + unmap_frontend_page(info); +free_vm: + free_vm_area(info->ring_area); + + return err; +} + +void scsiback_disconnect(struct vscsibk_info *info) +{ + if (info->kthread) { + kthread_stop(info->kthread); + info->kthread = NULL; + } + + wait_event(info->waiting_to_free, + atomic_read(&info->nr_unreplied_reqs) == 0); + + if (info->irq) { + unbind_from_irqhandler(info->irq, info); + info->irq = 0; + } + + if (info->ring.sring) { + unmap_frontend_page(info); + free_vm_area(info->ring_area); + info->ring.sring = NULL; + } +} + +void scsiback_free(struct vscsibk_info *info) +{ + kmem_cache_free(scsiback_cachep, info); +} + +int __init scsiback_interface_init(void) +{ + scsiback_cachep = kmem_cache_create("vscsiif_cache", + sizeof(struct vscsibk_info), 0, 0, NULL); + if (!scsiback_cachep) { + printk(KERN_ERR "scsiback: can't init scsi cache\n"); + return -ENOMEM; + } + + return 0; +} + +void scsiback_interface_exit(void) +{ + kmem_cache_destroy(scsiback_cachep); +} --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/scsiback.c +++ linux-ec2-2.6.31/drivers/xen/scsiback/scsiback.c @@ -0,0 +1,725 @@ +/* + * Xen SCSI backend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * Based on the blkback driver code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + + +struct list_head pending_free; +DEFINE_SPINLOCK(pending_free_lock); +DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS; +module_param_named(reqs, vscsiif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate"); + +static unsigned int log_print_stat = 0; +module_param(log_print_stat, int, 0644); + +#define SCSIBACK_INVALID_HANDLE (~0) + +static pending_req_t *pending_reqs; +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg; +} + +static unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +void scsiback_fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int err; + + if (req->nr_segments) { + for (i = 0; i < req->nr_segments; i++) { + handle = pending_handle(req, i); + if (handle == SCSIBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[i], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = SCSIBACK_INVALID_HANDLE; + invcount++; + } + + err = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(err); + kfree(req->sgl); + } + + return; +} + + +static pending_req_t * alloc_req(struct vscsibk_info *info) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + + +static void scsiback_notify_work(struct vscsibk_info *info) +{ + info->waiting_reqs = 1; + wake_up(&info->wq); +} + +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, + uint32_t resid, pending_req_t *pending_req) +{ + vscsiif_response_t *ring_res; + struct vscsibk_info *info = pending_req->info; + int notify; + int more_to_do = 1; + struct scsi_sense_hdr sshdr; + unsigned long flags; + + DPRINTK("%s\n",__FUNCTION__); + + spin_lock_irqsave(&info->ring_lock, flags); + + ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt); + info->ring.rsp_prod_pvt++; + + ring_res->rslt = result; + ring_res->rqid = pending_req->rqid; + + if (sense_buffer != NULL) { + if (scsi_normalize_sense(sense_buffer, + sizeof(sense_buffer), &sshdr)) { + + int len = 8 + sense_buffer[7]; + + if (len > VSCSIIF_SENSE_BUFFERSIZE) + len = VSCSIIF_SENSE_BUFFERSIZE; + + memcpy(ring_res->sense_buffer, sense_buffer, len); + ring_res->sense_len = len; + } + } else { + ring_res->sense_len = 0; + } + + ring_res->residual_len = resid; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify); + if (info->ring.rsp_prod_pvt == info->ring.req_cons) { + RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do); + } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&info->ring_lock, flags); + + if (more_to_do) + scsiback_notify_work(info); + + if (notify) + notify_remote_via_irq(info->irq); + + free_req(pending_req); +} + +static void scsiback_print_status(char *sense_buffer, int errors, + pending_req_t *pending_req) +{ + struct scsi_device *sdev = pending_req->sdev; + + printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no, + sdev->channel, sdev->id, sdev->lun); + printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n", + status_byte(errors), msg_byte(errors), + host_byte(errors), driver_byte(errors)); + + printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n", + pending_req->cmnd[0]); + + if (CHECK_CONDITION & status_byte(errors)) + __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE); +} + + +static void scsiback_cmd_done(struct request *req, int uptodate) +{ + pending_req_t *pending_req = req->end_io_data; + unsigned char *sense_buffer; + unsigned int resid; + int errors; + + sense_buffer = req->sense; + resid = blk_rq_bytes(req); + errors = req->errors; + + if (errors != 0) { + if (log_print_stat) + scsiback_print_status(sense_buffer, errors, pending_req); + } + + /* The Host mode is through as for Emulation. */ + if (pending_req->info->feature != VSCSI_TYPE_HOST) + scsiback_rsp_emulation(pending_req); + + scsiback_fast_flush_area(pending_req); + scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req); + scsiback_put(pending_req->info); + + __blk_put_request(req->q, req); +} + + +static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req, + pending_req_t *pending_req) +{ + u32 flags; + int write; + int i, err = 0; + unsigned int data_len = 0; + struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE]; + struct vscsibk_info *info = pending_req->info; + + int data_dir = (int)pending_req->sc_data_direction; + unsigned int nr_segments = (unsigned int)pending_req->nr_segments; + + write = (data_dir == DMA_TO_DEVICE); + + if (nr_segments) { + struct scatterlist *sg; + + /* free of (sgl) in fast_flush_area()*/ + pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments, + GFP_KERNEL); + if (!pending_req->sgl) { + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); + return -ENOMEM; + } + + sg_init_table(pending_req->sgl, nr_segments); + + for (i = 0; i < nr_segments; i++) { + flags = GNTMAP_host_map; + if (write) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + ring_req->seg[i].gref, + info->domid); + } + + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments); + BUG_ON(err); + + for_each_sg (pending_req->sgl, sg, nr_segments, i) { + if (unlikely(map[i].status != 0)) { + printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n"); + map[i].handle = SCSIBACK_INVALID_HANDLE; + err |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (err) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + + sg_set_page(sg, virt_to_page(vaddr(pending_req, i)), + ring_req->seg[i].length, + ring_req->seg[i].offset); + data_len += sg->length; + + barrier(); + if (sg->offset >= PAGE_SIZE || + sg->length > PAGE_SIZE || + sg->offset + sg->length > PAGE_SIZE) + err |= 1; + + } + + if (err) + goto fail_flush; + } + + pending_req->request_bufflen = data_len; + + return 0; + +fail_flush: + scsiback_fast_flush_area(pending_req); + return -ENOMEM; +} + +/* quoted scsi_lib.c/scsi_bi_endio */ +static void scsiback_bi_endio(struct bio *bio, int error) +{ + bio_put(bio); +} + + + +/* quoted scsi_lib.c/scsi_req_map_sg . */ +static struct bio *request_map_sg(pending_req_t *pending_req) +{ + struct request_queue *q = pending_req->sdev->request_queue; + unsigned int nsegs = (unsigned int)pending_req->nr_segments; + unsigned int i, len, bytes, off, nr_pages, nr_vecs = 0; + struct scatterlist *sg; + struct page *page; + struct bio *bio = NULL, *bio_first = NULL, *bio_last = NULL; + int err; + + for_each_sg (pending_req->sgl, sg, nsegs, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; + + nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT; + while (len > 0) { + bytes = min_t(unsigned int, len, PAGE_SIZE - off); + + if (!bio) { + nr_vecs = min_t(unsigned int, BIO_MAX_PAGES, + nr_pages); + nr_pages -= nr_vecs; + bio = bio_alloc(GFP_KERNEL, nr_vecs); + if (!bio) { + err = -ENOMEM; + goto free_bios; + } + bio->bi_end_io = scsiback_bi_endio; + if (bio_last) + bio_last->bi_next = bio; + else + bio_first = bio; + bio_last = bio; + } + + if (bio_add_pc_page(q, bio, page, bytes, off) != + bytes) { + bio_put(bio); + err = -EINVAL; + goto free_bios; + } + + if (bio->bi_vcnt >= nr_vecs) { + bio->bi_flags &= ~(1 << BIO_SEG_VALID); + if (pending_req->sc_data_direction == WRITE) + bio->bi_rw |= (1 << BIO_RW); + bio = NULL; + } + + page++; + len -= bytes; + off = 0; + } + } + + return bio_first; + +free_bios: + while ((bio = bio_first) != NULL) { + bio_first = bio->bi_next; + bio_put(bio); + } + + return ERR_PTR(err); +} + + +void scsiback_cmd_exec(pending_req_t *pending_req) +{ + int cmd_len = (int)pending_req->cmd_len; + int data_dir = (int)pending_req->sc_data_direction; + unsigned int timeout; + struct request *rq; + int write; + + DPRINTK("%s\n",__FUNCTION__); + + /* because it doesn't timeout backend earlier than frontend.*/ + if (pending_req->timeout_per_command) + timeout = pending_req->timeout_per_command * HZ; + else + timeout = VSCSIIF_TIMEOUT; + + write = (data_dir == DMA_TO_DEVICE); + if (pending_req->nr_segments) { + struct bio *bio = request_map_sg(pending_req); + + if (IS_ERR(bio)) { + printk(KERN_ERR "scsiback: SG Request Map Error\n"); + return; + } + + rq = blk_make_request(pending_req->sdev->request_queue, bio, + GFP_KERNEL); + if (IS_ERR(rq)) { + printk(KERN_ERR "scsiback: Make Request Error\n"); + return; + } + + rq->buffer = NULL; + } else { + rq = blk_get_request(pending_req->sdev->request_queue, write, + GFP_KERNEL); + if (unlikely(!rq)) { + printk(KERN_ERR "scsiback: Get Request Error\n"); + return; + } + } + + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_len = cmd_len; + memcpy(rq->cmd, pending_req->cmnd, cmd_len); + + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE); + rq->sense = pending_req->sense_buffer; + rq->sense_len = 0; + + /* not allowed to retry in backend. */ + rq->retries = 0; + rq->timeout = timeout; + rq->end_io_data = pending_req; + + scsiback_get(pending_req->info); + blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done); + + return ; +} + + +static void scsiback_device_reset_exec(pending_req_t *pending_req) +{ + struct vscsibk_info *info = pending_req->info; + int err; + struct scsi_device *sdev = pending_req->sdev; + + scsiback_get(info); + err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE); + + scsiback_do_resp_with_sense(NULL, err, 0, pending_req); + scsiback_put(info); + + return; +} + + +irqreturn_t scsiback_intr(int irq, void *dev_id) +{ + scsiback_notify_work((struct vscsibk_info *)dev_id); + return IRQ_HANDLED; +} + +static int prepare_pending_reqs(struct vscsibk_info *info, + vscsiif_request_t *ring_req, pending_req_t *pending_req) +{ + struct scsi_device *sdev; + struct ids_tuple vir; + int err = -EINVAL; + + DPRINTK("%s\n",__FUNCTION__); + + pending_req->rqid = ring_req->rqid; + pending_req->act = ring_req->act; + + pending_req->info = info; + + pending_req->v_chn = vir.chn = ring_req->channel; + pending_req->v_tgt = vir.tgt = ring_req->id; + vir.lun = ring_req->lun; + + rmb(); + sdev = scsiback_do_translation(info, &vir); + if (!sdev) { + pending_req->sdev = NULL; + DPRINTK("scsiback: doesn't exist.\n"); + err = -ENODEV; + goto invalid_value; + } + pending_req->sdev = sdev; + + /* request range check from frontend */ + pending_req->sc_data_direction = ring_req->sc_data_direction; + barrier(); + if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) && + (pending_req->sc_data_direction != DMA_TO_DEVICE) && + (pending_req->sc_data_direction != DMA_FROM_DEVICE) && + (pending_req->sc_data_direction != DMA_NONE)) { + DPRINTK("scsiback: invalid parameter data_dir = %d\n", + pending_req->sc_data_direction); + err = -EINVAL; + goto invalid_value; + } + + pending_req->nr_segments = ring_req->nr_segments; + barrier(); + if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) { + DPRINTK("scsiback: invalid parameter nr_seg = %d\n", + pending_req->nr_segments); + err = -EINVAL; + goto invalid_value; + } + + pending_req->cmd_len = ring_req->cmd_len; + barrier(); + if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) { + DPRINTK("scsiback: invalid parameter cmd_len = %d\n", + pending_req->cmd_len); + err = -EINVAL; + goto invalid_value; + } + memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len); + + pending_req->timeout_per_command = ring_req->timeout_per_command; + + if(scsiback_gnttab_data_map(ring_req, pending_req)) { + DPRINTK("scsiback: invalid buffer\n"); + err = -EINVAL; + goto invalid_value; + } + + return 0; + +invalid_value: + return err; +} + + +static int scsiback_do_cmd_fn(struct vscsibk_info *info) +{ + struct vscsiif_back_ring *ring = &info->ring; + vscsiif_request_t *ring_req; + + pending_req_t *pending_req; + RING_IDX rc, rp; + int err, more_to_do = 0; + + DPRINTK("%s\n",__FUNCTION__); + + rc = ring->req_cons; + rp = ring->sring->req_prod; + rmb(); + + while ((rc != rp)) { + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) + break; + pending_req = alloc_req(info); + if (NULL == pending_req) { + more_to_do = 1; + break; + } + + ring_req = RING_GET_REQUEST(ring, rc); + ring->req_cons = ++rc; + + err = prepare_pending_reqs(info, ring_req, + pending_req); + if (err == -EINVAL) { + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), + 0, pending_req); + continue; + } else if (err == -ENODEV) { + scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16), + 0, pending_req); + continue; + } + + if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) { + + /* The Host mode is through as for Emulation. */ + if (info->feature == VSCSI_TYPE_HOST) + scsiback_cmd_exec(pending_req); + else + scsiback_req_emulation_or_cmdexec(pending_req); + + } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) { + scsiback_device_reset_exec(pending_req); + } else { + printk(KERN_ERR "scsiback: invalid parameter for request\n"); + scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), + 0, pending_req); + continue; + } + } + + if (RING_HAS_UNCONSUMED_REQUESTS(ring)) + more_to_do = 1; + + /* Yield point for this unbounded loop. */ + cond_resched(); + + return more_to_do; +} + + +int scsiback_schedule(void *data) +{ + struct vscsibk_info *info = (struct vscsibk_info *)data; + + DPRINTK("%s\n",__FUNCTION__); + + while (!kthread_should_stop()) { + wait_event_interruptible( + info->wq, + info->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + info->waiting_reqs = 0; + smp_mb(); + + if (scsiback_do_cmd_fn(info)) + info->waiting_reqs = 1; + } + + return 0; +} + + +static int __init scsiback_init(void) +{ + int i, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE; + + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + vscsiif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE; + + if (scsiback_interface_init() < 0) + goto out_of_kmem; + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < vscsiif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); + + if (scsiback_xenbus_init()) + goto out_of_xenbus; + + scsiback_emulation_init(); + + return 0; + +out_of_xenbus: + scsiback_xenbus_unregister(); +out_of_kmem: + scsiback_interface_exit(); +out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +#if 0 +static void __exit scsiback_exit(void) +{ + scsiback_xenbus_unregister(); + scsiback_interface_exit(); + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE)); + +} +#endif + +module_init(scsiback_init); + +#if 0 +module_exit(scsiback_exit); +#endif + +MODULE_DESCRIPTION("Xen SCSI backend driver"); +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/scsiback/xenbus.c @@ -0,0 +1,377 @@ +/* + * Xen SCSI backend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * Based on the blkback driver code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "common.h" + +struct backend_info +{ + struct xenbus_device *dev; + struct vscsibk_info *info; +}; + + +static int __vscsiif_name(struct backend_info *be, char *buf) +{ + struct xenbus_device *dev = be->dev; + unsigned int domid, id; + + sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id); + snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id); + + return 0; +} + +static int scsiback_map(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + int err; + char name[TASK_COMM_LEN]; + + err = xenbus_gather(XBT_NIL, dev->otherend, + "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend); + return err; + } + + err = scsiback_init_sring(be->info, ring_ref, evtchn); + if (err) + return err; + + err = __vscsiif_name(be, name); + if (err) { + xenbus_dev_error(dev, err, "get scsiback dev name"); + return err; + } + + be->info->kthread = kthread_run(scsiback_schedule, be->info, name); + if (IS_ERR(be->info->kthread)) { + err = PTR_ERR(be->info->kthread); + be->info->kthread = NULL; + xenbus_dev_error(be->dev, err, "start vscsiif"); + return err; + } + + return 0; +} + + +struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy) +{ + struct Scsi_Host *shost; + struct scsi_device *sdev = NULL; + + shost = scsi_host_lookup(phy->hst); + if (IS_ERR(shost)) { + printk(KERN_ERR "scsiback: host%d doesn't exist.\n", + phy->hst); + return NULL; + } + sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun); + if (!sdev) { + printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n", + phy->hst, phy->chn, phy->tgt, phy->lun); + scsi_host_put(shost); + return NULL; + } + + scsi_host_put(shost); + return (sdev); +} + +#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1 +#define VSCSIBACK_OP_UPDATEDEV_STATE 2 + + +static void scsiback_do_lun_hotplug(struct backend_info *be, int op) +{ + int i, err = 0; + struct ids_tuple phy, vir; + int device_state; + char str[64], state_str[64]; + char **dir; + unsigned int dir_n = 0; + struct xenbus_device *dev = be->dev; + struct scsi_device *sdev; + + dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n); + if (IS_ERR(dir)) + return; + + for (i = 0; i < dir_n; i++) { + + /* read status */ + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]); + err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u", + &device_state); + if (XENBUS_EXIST_ERR(err)) + continue; + + /* physical SCSI device */ + snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]); + err = xenbus_scanf(XBT_NIL, dev->nodename, str, + "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun); + if (XENBUS_EXIST_ERR(err)) { + xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed); + continue; + } + + /* virtual SCSI device */ + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]); + err = xenbus_scanf(XBT_NIL, dev->nodename, str, + "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun); + if (XENBUS_EXIST_ERR(err)) { + xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed); + continue; + } + + switch (op) { + case VSCSIBACK_OP_ADD_OR_DEL_LUN: + if (device_state == XenbusStateInitialising) { + sdev = scsiback_get_scsi_device(&phy); + if (!sdev) + xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed); + else { + err = scsiback_add_translation_entry(be->info, sdev, &vir); + if (!err) { + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateInitialised)) { + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + scsiback_del_translation_entry(be->info, &vir); + } + } else { + scsi_device_put(sdev); + xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed); + } + } + } + + if (device_state == XenbusStateClosing) { + if (!scsiback_del_translation_entry(be->info, &vir)) { + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed)) + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + } + } + break; + + case VSCSIBACK_OP_UPDATEDEV_STATE: + if (device_state == XenbusStateInitialised) { + /* modify vscsi-devs/dev-x/state */ + if (xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateConnected)) { + printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + scsiback_del_translation_entry(be->info, &vir); + xenbus_printf(XBT_NIL, dev->nodename, state_str, + "%d", XenbusStateClosed); + } + } + break; + /*When it is necessary, processing is added here.*/ + default: + break; + } + } + + kfree(dir); + return ; +} + + +static void scsiback_frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + switch (frontend_state) { + case XenbusStateInitialising: + break; + case XenbusStateInitialised: + err = scsiback_map(be); + if (err) + break; + + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN); + xenbus_switch_state(dev, XenbusStateConnected); + + break; + case XenbusStateConnected: + + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE); + + if (dev->state == XenbusStateConnected) + break; + + xenbus_switch_state(dev, XenbusStateConnected); + + break; + + case XenbusStateClosing: + scsiback_disconnect(be->info); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + case XenbusStateReconfiguring: + scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN); + + xenbus_switch_state(dev, XenbusStateReconfigured); + + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +static int scsiback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + if (be->info) { + scsiback_disconnect(be->info); + scsiback_release_translation_entry(be->info); + scsiback_free(be->info); + be->info = NULL; + } + + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + + return 0; +} + + +static int scsiback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + unsigned val = 0; + + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + + DPRINTK("%p %d\n", dev, dev->otherend_id); + + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + be->info = vscsibk_info_alloc(dev->otherend_id); + if (IS_ERR(be->info)) { + err = PTR_ERR(be->info); + be->info = NULL; + xenbus_dev_fatal(dev, err, "creating scsihost interface"); + goto fail; + } + + be->info->dev = dev; + be->info->irq = 0; + be->info->feature = 0; /*default not HOSTMODE.*/ + + scsiback_init_translation_table(be->info); + + err = xenbus_scanf(XBT_NIL, dev->nodename, + "feature-host", "%d", &val); + if (XENBUS_EXIST_ERR(err)) + val = 0; + + if (val) + be->info->feature = VSCSI_TYPE_HOST; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + + +fail: + printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__); + scsiback_remove(dev); + + return err; +} + + +static struct xenbus_device_id scsiback_ids[] = { + { "vscsi" }, + { "" } +}; + +static struct xenbus_driver scsiback = { + .name = "vscsi", + .ids = scsiback_ids, + .probe = scsiback_probe, + .remove = scsiback_remove, + .otherend_changed = scsiback_frontend_changed +}; + +int scsiback_xenbus_init(void) +{ + return xenbus_register_backend(&scsiback); +} + +void scsiback_xenbus_unregister(void) +{ + xenbus_unregister_driver(&scsiback); +} --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/translate.c +++ linux-ec2-2.6.31/drivers/xen/scsiback/translate.c @@ -0,0 +1,168 @@ +/* + * Xen SCSI backend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "common.h" + +/* + Initialize the translation entry list +*/ +void scsiback_init_translation_table(struct vscsibk_info *info) +{ + INIT_LIST_HEAD(&info->v2p_entry_lists); + spin_lock_init(&info->v2p_lock); +} + + +/* + Add a new translation entry +*/ +int scsiback_add_translation_entry(struct vscsibk_info *info, + struct scsi_device *sdev, struct ids_tuple *v) +{ + int err = 0; + struct v2p_entry *entry; + struct v2p_entry *new; + struct list_head *head = &(info->v2p_entry_lists); + unsigned long flags; + + spin_lock_irqsave(&info->v2p_lock, flags); + + /* Check double assignment to identical virtual ID */ + list_for_each_entry(entry, head, l) { + if ((entry->v.chn == v->chn) && + (entry->v.tgt == v->tgt) && + (entry->v.lun == v->lun)) { + printk(KERN_WARNING "scsiback: Virtual ID is already used. " + "Assignment was not performed.\n"); + err = -EEXIST; + goto out; + } + + } + + /* Create a new translation entry and add to the list */ + if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) { + printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); + err = -ENOMEM; + goto out; + } + new->v = *v; + new->sdev = sdev; + list_add_tail(&new->l, head); + +out: + spin_unlock_irqrestore(&info->v2p_lock, flags); + return err; +} + + +/* + Delete the translation entry specfied +*/ +int scsiback_del_translation_entry(struct vscsibk_info *info, + struct ids_tuple *v) +{ + struct v2p_entry *entry; + struct list_head *head = &(info->v2p_entry_lists); + unsigned long flags; + + spin_lock_irqsave(&info->v2p_lock, flags); + /* Find out the translation entry specified */ + list_for_each_entry(entry, head, l) { + if ((entry->v.chn == v->chn) && + (entry->v.tgt == v->tgt) && + (entry->v.lun == v->lun)) { + goto found; + } + } + + spin_unlock_irqrestore(&info->v2p_lock, flags); + return 1; + +found: + /* Delete the translation entry specfied */ + scsi_device_put(entry->sdev); + list_del(&entry->l); + kfree(entry); + + spin_unlock_irqrestore(&info->v2p_lock, flags); + return 0; +} + + +/* + Perform virtual to physical translation +*/ +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info, + struct ids_tuple *v) +{ + struct v2p_entry *entry; + struct list_head *head = &(info->v2p_entry_lists); + struct scsi_device *sdev = NULL; + unsigned long flags; + + spin_lock_irqsave(&info->v2p_lock, flags); + list_for_each_entry(entry, head, l) { + if ((entry->v.chn == v->chn) && + (entry->v.tgt == v->tgt) && + (entry->v.lun == v->lun)) { + sdev = entry->sdev; + goto out; + } + } +out: + spin_unlock_irqrestore(&info->v2p_lock, flags); + return sdev; +} + + +/* + Release the translation entry specfied +*/ +void scsiback_release_translation_entry(struct vscsibk_info *info) +{ + struct v2p_entry *entry, *tmp; + struct list_head *head = &(info->v2p_entry_lists); + unsigned long flags; + + spin_lock_irqsave(&info->v2p_lock, flags); + list_for_each_entry_safe(entry, tmp, head, l) { + scsi_device_put(entry->sdev); + list_del(&entry->l); + kfree(entry); + } + + spin_unlock_irqrestore(&info->v2p_lock, flags); + return; + +} --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/emulate.c +++ linux-ec2-2.6.31/drivers/xen/scsiback/emulate.c @@ -0,0 +1,473 @@ +/* + * Xen SCSI backend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include "common.h" + +/* Following SCSI commands are not defined in scsi/scsi.h */ +#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */ +#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */ +#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */ +#define SET_PRIORITY 0xa4 /* SET PRIORITY command */ + + +/* + The bitmap in order to control emulation. + (Bit 3 to 7 are reserved for future use.) +*/ +#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */ + /* is required. */ +#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */ + /* emulation reqest buff before */ + /* cmd exec. */ +#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */ + /* emulation resp buff after */ + /* cmd exec. */ + +/* Additional Sense Code (ASC) used */ +#define NO_ADDITIONAL_SENSE 0x0 +#define LOGICAL_UNIT_NOT_READY 0x4 +#define UNRECOVERED_READ_ERR 0x11 +#define PARAMETER_LIST_LENGTH_ERR 0x1a +#define INVALID_OPCODE 0x20 +#define ADDR_OUT_OF_RANGE 0x21 +#define INVALID_FIELD_IN_CDB 0x24 +#define INVALID_FIELD_IN_PARAM_LIST 0x26 +#define POWERON_RESET 0x29 +#define SAVING_PARAMS_UNSUP 0x39 +#define THRESHOLD_EXCEEDED 0x5d +#define LOW_POWER_COND_ON 0x5e + + + +/* Number os SCSI op_code */ +#define VSCSI_MAX_SCSI_OP_CODE 256 +static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE]; + +#define NO_EMULATE(cmd) \ + bitmap[cmd] = VSCSIIF_NEED_CMD_EXEC; \ + pre_function[cmd] = NULL; \ + post_function[cmd] = NULL + + + +/* + Emulation routines for each SCSI op_code. +*/ +static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *); +static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *); + + +static const int check_condition_result = + (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; + +static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key, + uint8_t asc, uint8_t asq) +{ + data[0] = 0x70; /* fixed, current */ + data[2] = key; + data[7] = 0xa; /* implies 18 byte sense buffer */ + data[12] = asc; + data[13] = asq; +} + +static void resp_not_supported_cmd(pending_req_t *pending_req, void *data) +{ + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST, + INVALID_OPCODE, 0); + pending_req->resid = 0; + pending_req->rslt = check_condition_result; +} + + +static int __copy_to_sg(struct scatterlist *sgl, unsigned int nr_sg, + void *buf, unsigned int buflen) +{ + struct scatterlist *sg; + void *from = buf; + void *to; + unsigned int from_rest = buflen; + unsigned int to_capa; + unsigned int copy_size = 0; + unsigned int i; + unsigned long pfn; + + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { + printk(KERN_WARNING "%s: inconsistent length field in " + "scatterlist\n", __FUNCTION__); + return -ENOMEM; + } + + to_capa = sg->length; + copy_size = min_t(unsigned int, to_capa, from_rest); + + pfn = page_to_pfn(sg_page(sg)); + to = pfn_to_kaddr(pfn) + (sg->offset); + memcpy(to, from, copy_size); + + from_rest -= copy_size; + if (from_rest == 0) { + return 0; + } + + from += copy_size; + } + + printk(KERN_WARNING "%s: no space in scatterlist\n", + __FUNCTION__); + return -ENOMEM; +} + +static int __copy_from_sg(struct scatterlist *sgl, unsigned int nr_sg, + void *buf, unsigned int buflen) +{ + struct scatterlist *sg; + void *from; + void *to = buf; + unsigned int from_rest; + unsigned int to_capa = buflen; + unsigned int copy_size; + unsigned int i; + unsigned long pfn; + + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { + printk(KERN_WARNING "%s: inconsistent length field in " + "scatterlist\n", __FUNCTION__); + return -ENOMEM; + } + + from_rest = sg->length; + if ((from_rest > 0) && (to_capa < from_rest)) { + printk(KERN_WARNING + "%s: no space in destination buffer\n", + __FUNCTION__); + return -ENOMEM; + } + copy_size = from_rest; + + pfn = page_to_pfn(sg_page(sg)); + from = pfn_to_kaddr(pfn) + (sg->offset); + memcpy(to, from, copy_size); + + to_capa -= copy_size; + to += copy_size; + } + + return 0; +} + +static int __nr_luns_under_host(struct vscsibk_info *info) +{ + struct v2p_entry *entry; + struct list_head *head = &(info->v2p_entry_lists); + unsigned long flags; + int lun_cnt = 0; + + spin_lock_irqsave(&info->v2p_lock, flags); + list_for_each_entry(entry, head, l) { + lun_cnt++; + } + spin_unlock_irqrestore(&info->v2p_lock, flags); + + return (lun_cnt); +} + + +/* REPORT LUNS Define*/ +#define VSCSI_REPORT_LUNS_HEADER 8 +#define VSCSI_REPORT_LUNS_RETRY 3 + +/* quoted scsi_debug.c/resp_report_luns() */ +static void __report_luns(pending_req_t *pending_req, void *data) +{ + struct vscsibk_info *info = pending_req->info; + unsigned int channel = pending_req->v_chn; + unsigned int target = pending_req->v_tgt; + unsigned int nr_seg = pending_req->nr_segments; + unsigned char *cmd = (unsigned char *)pending_req->cmnd; + + unsigned char *buff = NULL; + unsigned char alloc_len; + unsigned int alloc_luns = 0; + unsigned int req_bufflen = 0; + unsigned int actual_len = 0; + unsigned int retry_cnt = 0; + int select_report = (int)cmd[2]; + int i, lun_cnt = 0, lun, upper, err = 0; + + struct v2p_entry *entry; + struct list_head *head = &(info->v2p_entry_lists); + unsigned long flags; + + struct scsi_lun *one_lun; + + req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24); + if ((req_bufflen < 4) || (select_report != 0)) + goto fail; + + alloc_luns = __nr_luns_under_host(info); + alloc_len = sizeof(struct scsi_lun) * alloc_luns + + VSCSI_REPORT_LUNS_HEADER; +retry: + if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) { + printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__); + goto fail; + } + + memset(buff, 0, alloc_len); + + one_lun = (struct scsi_lun *) &buff[8]; + spin_lock_irqsave(&info->v2p_lock, flags); + list_for_each_entry(entry, head, l) { + if ((entry->v.chn == channel) && + (entry->v.tgt == target)) { + + /* check overflow */ + if (lun_cnt >= alloc_luns) { + spin_unlock_irqrestore(&info->v2p_lock, + flags); + + if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) { + retry_cnt++; + if (buff) + kfree(buff); + goto retry; + } + + goto fail; + } + + lun = entry->v.lun; + upper = (lun >> 8) & 0x3f; + if (upper) + one_lun[lun_cnt].scsi_lun[0] = upper; + one_lun[lun_cnt].scsi_lun[1] = lun & 0xff; + lun_cnt++; + } + } + + spin_unlock_irqrestore(&info->v2p_lock, flags); + + buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff; + buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff; + + actual_len = lun_cnt * sizeof(struct scsi_lun) + + VSCSI_REPORT_LUNS_HEADER; + req_bufflen = 0; + for (i = 0; i < nr_seg; i++) + req_bufflen += pending_req->sgl[i].length; + + err = __copy_to_sg(pending_req->sgl, nr_seg, buff, + min(req_bufflen, actual_len)); + if (err) + goto fail; + + memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE); + pending_req->rslt = 0x00; + pending_req->resid = req_bufflen - min(req_bufflen, actual_len); + + kfree(buff); + return; + +fail: + scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + pending_req->rslt = check_condition_result; + pending_req->resid = 0; + if (buff) + kfree(buff); + return; +} + + + +int __pre_do_emulation(pending_req_t *pending_req, void *data) +{ + uint8_t op_code = pending_req->cmnd[0]; + + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) && + pre_function[op_code] != NULL) { + pre_function[op_code](pending_req, data); + } + + /* + 0: no need for native driver call, so should return immediately. + 1: non emulation or should call native driver + after modifing the request buffer. + */ + return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC); +} + +void scsiback_rsp_emulation(pending_req_t *pending_req) +{ + uint8_t op_code = pending_req->cmnd[0]; + + if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) && + post_function[op_code] != NULL) { + post_function[op_code](pending_req, NULL); + } + + return; +} + + +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req) +{ + if (__pre_do_emulation(pending_req, NULL)) { + scsiback_cmd_exec(pending_req); + } + else { + scsiback_fast_flush_area(pending_req); + scsiback_do_resp_with_sense(pending_req->sense_buffer, + pending_req->rslt, pending_req->resid, pending_req); + } +} + + +/* + Following are not customizable functions. +*/ +void scsiback_emulation_init(void) +{ + int i; + + /* Initialize to default state */ + for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) { + bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF | + VSCSIIF_NEED_EMULATE_RSPBUF); + pre_function[i] = resp_not_supported_cmd; + post_function[i] = NULL; + /* means, + - no need for pre-emulation + - no need for post-emulation + - call native driver + */ + } + + /* + Register appropriate functions below as you need. + (See scsi/scsi.h for definition of SCSI op_code.) + */ + + /* + Following commands do not require emulation. + */ + NO_EMULATE(TEST_UNIT_READY); /*0x00*/ + NO_EMULATE(REZERO_UNIT); /*0x01*/ + NO_EMULATE(REQUEST_SENSE); /*0x03*/ + NO_EMULATE(FORMAT_UNIT); /*0x04*/ + NO_EMULATE(READ_BLOCK_LIMITS); /*0x05*/ + /*NO_EMULATE(REASSIGN_BLOCKS); *//*0x07*/ + /*NO_EMULATE(INITIALIZE_ELEMENT_STATUS); *//*0x07*/ + NO_EMULATE(READ_6); /*0x08*/ + NO_EMULATE(WRITE_6); /*0x0a*/ + /*NO_EMULATE(SEEK_6); *//*0x0b*/ + /*NO_EMULATE(READ_REVERSE); *//*0x0f*/ + NO_EMULATE(WRITE_FILEMARKS); /*0x10*/ + NO_EMULATE(SPACE); /*0x11*/ + NO_EMULATE(INQUIRY); /*0x12*/ + /*NO_EMULATE(RECOVER_BUFFERED_DATA); *//*0x14*/ + /*NO_EMULATE(MODE_SELECT); *//*0x15*/ + /*NO_EMULATE(RESERVE); *//*0x16*/ + /*NO_EMULATE(RELEASE); *//*0x17*/ + /*NO_EMULATE(COPY); *//*0x18*/ + NO_EMULATE(ERASE); /*0x19*/ + NO_EMULATE(MODE_SENSE); /*0x1a*/ + /*NO_EMULATE(START_STOP); *//*0x1b*/ + /*NO_EMULATE(RECEIVE_DIAGNOSTIC); *//*0x1c*/ + NO_EMULATE(SEND_DIAGNOSTIC); /*0x1d*/ + /*NO_EMULATE(ALLOW_MEDIUM_REMOVAL); *//*0x1e*/ + + /*NO_EMULATE(SET_WINDOW); *//*0x24*/ + NO_EMULATE(READ_CAPACITY); /*0x25*/ + NO_EMULATE(READ_10); /*0x28*/ + NO_EMULATE(WRITE_10); /*0x2a*/ + /*NO_EMULATE(SEEK_10); *//*0x2b*/ + /*NO_EMULATE(POSITION_TO_ELEMENT); *//*0x2b*/ + /*NO_EMULATE(WRITE_VERIFY); *//*0x2e*/ + /*NO_EMULATE(VERIFY); *//*0x2f*/ + /*NO_EMULATE(SEARCH_HIGH); *//*0x30*/ + /*NO_EMULATE(SEARCH_EQUAL); *//*0x31*/ + /*NO_EMULATE(SEARCH_LOW); *//*0x32*/ + /*NO_EMULATE(SET_LIMITS); *//*0x33*/ + /*NO_EMULATE(PRE_FETCH); *//*0x34*/ + /*NO_EMULATE(READ_POSITION); *//*0x34*/ + /*NO_EMULATE(SYNCHRONIZE_CACHE); *//*0x35*/ + /*NO_EMULATE(LOCK_UNLOCK_CACHE); *//*0x36*/ + /*NO_EMULATE(READ_DEFECT_DATA); *//*0x37*/ + /*NO_EMULATE(MEDIUM_SCAN); *//*0x38*/ + /*NO_EMULATE(COMPARE); *//*0x39*/ + /*NO_EMULATE(COPY_VERIFY); *//*0x3a*/ + /*NO_EMULATE(WRITE_BUFFER); *//*0x3b*/ + /*NO_EMULATE(READ_BUFFER); *//*0x3c*/ + /*NO_EMULATE(UPDATE_BLOCK); *//*0x3d*/ + /*NO_EMULATE(READ_LONG); *//*0x3e*/ + /*NO_EMULATE(WRITE_LONG); *//*0x3f*/ + /*NO_EMULATE(CHANGE_DEFINITION); *//*0x40*/ + /*NO_EMULATE(WRITE_SAME); *//*0x41*/ + /*NO_EMULATE(READ_TOC); *//*0x43*/ + /*NO_EMULATE(LOG_SELECT); *//*0x4c*/ + /*NO_EMULATE(LOG_SENSE); *//*0x4d*/ + /*NO_EMULATE(MODE_SELECT_10); *//*0x55*/ + /*NO_EMULATE(RESERVE_10); *//*0x56*/ + /*NO_EMULATE(RELEASE_10); *//*0x57*/ + /*NO_EMULATE(MODE_SENSE_10); *//*0x5a*/ + /*NO_EMULATE(PERSISTENT_RESERVE_IN); *//*0x5e*/ + /*NO_EMULATE(PERSISTENT_RESERVE_OUT); *//*0x5f*/ + /* REPORT_LUNS *//*0xa0*//*Full emulaiton*/ + /*NO_EMULATE(MOVE_MEDIUM); *//*0xa5*/ + /*NO_EMULATE(EXCHANGE_MEDIUM); *//*0xa6*/ + /*NO_EMULATE(READ_12); *//*0xa8*/ + /*NO_EMULATE(WRITE_12); *//*0xaa*/ + /*NO_EMULATE(WRITE_VERIFY_12); *//*0xae*/ + /*NO_EMULATE(SEARCH_HIGH_12); *//*0xb0*/ + /*NO_EMULATE(SEARCH_EQUAL_12); *//*0xb1*/ + /*NO_EMULATE(SEARCH_LOW_12); *//*0xb2*/ + /*NO_EMULATE(READ_ELEMENT_STATUS); *//*0xb8*/ + /*NO_EMULATE(SEND_VOLUME_TAG); *//*0xb6*/ + /*NO_EMULATE(WRITE_LONG_2); *//*0xea*/ + /*NO_EMULATE(READ_16); *//*0x88*/ + /*NO_EMULATE(WRITE_16); *//*0x8a*/ + /*NO_EMULATE(VERIFY_16); *//*0x8f*/ + /*NO_EMULATE(SERVICE_ACTION_IN); *//*0x9e*/ + + /* + Following commands require emulation. + */ + pre_function[REPORT_LUNS] = __report_luns; + bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF | + VSCSIIF_NEED_EMULATE_RSPBUF); + + return; +} --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/Makefile +++ linux-ec2-2.6.31/drivers/xen/scsiback/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsibk.o + +xen-scsibk-y := interface.o scsiback.o xenbus.o translate.o emulate.o + --- linux-ec2-2.6.31.orig/drivers/xen/scsiback/common.h +++ linux-ec2-2.6.31/drivers/xen/scsiback/common.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2008, FUJITSU Limited + * + * Based on the blkback driver code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __SCSIIF__BACKEND__COMMON_H__ +#define __SCSIIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct ids_tuple { + unsigned int hst; /* host */ + unsigned int chn; /* channel */ + unsigned int tgt; /* target */ + unsigned int lun; /* LUN */ +}; + +struct v2p_entry { + struct ids_tuple v; /* translate from */ + struct scsi_device *sdev; /* translate to */ + struct list_head l; +}; + +struct vscsibk_info { + struct xenbus_device *dev; + + domid_t domid; + unsigned int evtchn; + unsigned int irq; + + int feature; + + struct vscsiif_back_ring ring; + struct vm_struct *ring_area; + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; + + spinlock_t ring_lock; + atomic_t nr_unreplied_reqs; + + spinlock_t v2p_lock; + struct list_head v2p_entry_lists; + + struct task_struct *kthread; + wait_queue_head_t waiting_to_free; + wait_queue_head_t wq; + unsigned int waiting_reqs; + struct page **mmap_pages; + +}; + +typedef struct { + unsigned char act; + struct vscsibk_info *info; + struct scsi_device *sdev; + + uint16_t rqid; + + uint16_t v_chn, v_tgt; + + uint8_t nr_segments; + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; + uint8_t cmd_len; + + uint8_t sc_data_direction; + uint16_t timeout_per_command; + + uint32_t request_bufflen; + struct scatterlist *sgl; + grant_ref_t gref[VSCSIIF_SG_TABLESIZE]; + + int32_t rslt; + uint32_t resid; + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; + + struct list_head free_list; +} pending_req_t; + + + +#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs)) +#define scsiback_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +#define VSCSIIF_TIMEOUT (900*HZ) + +#define VSCSI_TYPE_HOST 1 + +irqreturn_t scsiback_intr(int, void *); +int scsiback_init_sring(struct vscsibk_info *info, + unsigned long ring_ref, unsigned int evtchn); +int scsiback_schedule(void *data); + + +struct vscsibk_info *vscsibk_info_alloc(domid_t domid); +void scsiback_free(struct vscsibk_info *info); +void scsiback_disconnect(struct vscsibk_info *info); +int __init scsiback_interface_init(void); +void scsiback_interface_exit(void); +int scsiback_xenbus_init(void); +void scsiback_xenbus_unregister(void); + +void scsiback_init_translation_table(struct vscsibk_info *info); + +int scsiback_add_translation_entry(struct vscsibk_info *info, + struct scsi_device *sdev, struct ids_tuple *v); + +int scsiback_del_translation_entry(struct vscsibk_info *info, + struct ids_tuple *v); +struct scsi_device *scsiback_do_translation(struct vscsibk_info *info, + struct ids_tuple *v); +void scsiback_release_translation_entry(struct vscsibk_info *info); + + +void scsiback_cmd_exec(pending_req_t *pending_req); +void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result, + uint32_t resid, pending_req_t *pending_req); +void scsiback_fast_flush_area(pending_req_t *req); + +void scsiback_rsp_emulation(pending_req_t *pending_req); +void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req); +void scsiback_emulation_init(void); + + +#endif /* __SCSIIF__BACKEND__COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/scsifront/scsifront.c +++ linux-ec2-2.6.31/drivers/xen/scsifront/scsifront.c @@ -0,0 +1,478 @@ +/* + * Xen SCSI frontend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include "common.h" + +static int get_id_from_freelist(struct vscsifrnt_info *info) +{ + unsigned long flags; + uint32_t free; + + spin_lock_irqsave(&info->shadow_lock, flags); + + free = info->shadow_free; + BUG_ON(free > VSCSIIF_MAX_REQS); + info->shadow_free = info->shadow[free].next_free; + info->shadow[free].next_free = 0x0fff; + + info->shadow[free].wait_reset = 0; + + spin_unlock_irqrestore(&info->shadow_lock, flags); + + return free; +} + +static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id) +{ + unsigned long flags; + + spin_lock_irqsave(&info->shadow_lock, flags); + + info->shadow[id].next_free = info->shadow_free; + info->shadow[id].req_scsi_cmnd = 0; + info->shadow_free = id; + + spin_unlock_irqrestore(&info->shadow_lock, flags); +} + + +struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info) +{ + struct vscsiif_front_ring *ring = &(info->ring); + vscsiif_request_t *ring_req; + uint32_t id; + + ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt); + + ring->req_prod_pvt++; + + id = get_id_from_freelist(info); /* use id by response */ + ring_req->rqid = (uint16_t)id; + + return ring_req; +} + + +static void scsifront_notify_work(struct vscsifrnt_info *info) +{ + info->waiting_resp = 1; + wake_up(&info->wq); +} + + +static void scsifront_do_request(struct vscsifrnt_info *info) +{ + struct vscsiif_front_ring *ring = &(info->ring); + unsigned int irq = info->irq; + int notify; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify); + if (notify) + notify_remote_via_irq(irq); +} + +irqreturn_t scsifront_intr(int irq, void *dev_id) +{ + scsifront_notify_work((struct vscsifrnt_info *)dev_id); + return IRQ_HANDLED; +} + + +static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id) +{ + int i; + + if (s->sc_data_direction == DMA_NONE) + return; + + if (s->nr_segments) { + for (i = 0; i < s->nr_segments; i++) { + if (unlikely(gnttab_query_foreign_access( + s->gref[i]) != 0)) { + printk(KERN_ALERT "scsifront: " + "grant still in use by backend.\n"); + BUG(); + } + gnttab_end_foreign_access(s->gref[i], 0UL); + } + } + + return; +} + + +static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info, + vscsiif_response_t *ring_res) +{ + struct scsi_cmnd *sc; + uint32_t id; + uint8_t sense_len; + + id = ring_res->rqid; + sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd; + + if (sc == NULL) + BUG(); + + scsifront_gnttab_done(&info->shadow[id], id); + add_id_to_freelist(info, id); + + sc->result = ring_res->rslt; + scsi_set_resid(sc, ring_res->residual_len); + + if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE) + sense_len = VSCSIIF_SENSE_BUFFERSIZE; + else + sense_len = ring_res->sense_len; + + if (sense_len) + memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len); + + sc->scsi_done(sc); + + return; +} + + +static void scsifront_sync_cmd_done(struct vscsifrnt_info *info, + vscsiif_response_t *ring_res) +{ + uint16_t id = ring_res->rqid; + unsigned long flags; + + spin_lock_irqsave(&info->shadow_lock, flags); + info->shadow[id].wait_reset = 1; + info->shadow[id].rslt_reset = ring_res->rslt; + spin_unlock_irqrestore(&info->shadow_lock, flags); + + wake_up(&(info->shadow[id].wq_reset)); +} + + +int scsifront_cmd_done(struct vscsifrnt_info *info) +{ + vscsiif_response_t *ring_res; + + RING_IDX i, rp; + int more_to_do = 0; + unsigned long flags; + + spin_lock_irqsave(&info->io_lock, flags); + + rp = info->ring.sring->rsp_prod; + rmb(); + for (i = info->ring.rsp_cons; i != rp; i++) { + + ring_res = RING_GET_RESPONSE(&info->ring, i); + + if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB) + scsifront_cdb_cmd_done(info, ring_res); + else + scsifront_sync_cmd_done(info, ring_res); + } + + info->ring.rsp_cons = i; + + if (i != info->ring.req_prod_pvt) { + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); + } else { + info->ring.sring->rsp_event = i + 1; + } + + spin_unlock_irqrestore(&info->io_lock, flags); + + + /* Yield point for this unbounded loop. */ + cond_resched(); + + return more_to_do; +} + + + + +int scsifront_schedule(void *data) +{ + struct vscsifrnt_info *info = (struct vscsifrnt_info *)data; + + while (!kthread_should_stop()) { + wait_event_interruptible( + info->wq, + info->waiting_resp || kthread_should_stop()); + + info->waiting_resp = 0; + smp_mb(); + + if (scsifront_cmd_done(info)) + info->waiting_resp = 1; + } + + return 0; +} + + + +static int map_data_for_request(struct vscsifrnt_info *info, + struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id) +{ + grant_ref_t gref_head; + struct page *page; + int err, ref, ref_cnt = 0; + int write = (sc->sc_data_direction == DMA_TO_DEVICE); + unsigned int i, nr_pages, off, len, bytes; + unsigned long buffer_pfn; + + if (sc->sc_data_direction == DMA_NONE) + return 0; + + err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head); + if (err) { + printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n"); + return -ENOMEM; + } + + if (scsi_bufflen(sc)) { + /* quoted scsi_lib.c/scsi_req_map_sg . */ + struct scatterlist *sg, *sgl = scsi_sglist(sc); + unsigned int data_len = scsi_bufflen(sc); + + nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages > VSCSIIF_SG_TABLESIZE) { + printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); + ref_cnt = (-E2BIG); + goto big_to_sg; + } + + for_each_sg (sgl, sg, scsi_sg_count(sc), i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; + + buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; + + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ + bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); + + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, + buffer_pfn, write); + + info->shadow[id].gref[ref_cnt] = ref; + ring_req->seg[ref_cnt].gref = ref; + ring_req->seg[ref_cnt].offset = (uint16_t)off; + ring_req->seg[ref_cnt].length = (uint16_t)bytes; + + buffer_pfn++; + len -= bytes; + data_len -= bytes; + off = 0; + ref_cnt++; + } + } + } + +big_to_sg: + + gnttab_free_grant_references(gref_head); + + return ref_cnt; +} + +static int scsifront_queuecommand(struct scsi_cmnd *sc, + void (*done)(struct scsi_cmnd *)) +{ + struct vscsifrnt_info *info = + (struct vscsifrnt_info *) sc->device->host->hostdata; + vscsiif_request_t *ring_req; + int ref_cnt; + uint16_t rqid; + + if (RING_FULL(&info->ring)) { + goto out_host_busy; + } + + sc->scsi_done = done; + sc->result = 0; + + ring_req = scsifront_pre_request(info); + rqid = ring_req->rqid; + ring_req->act = VSCSIIF_ACT_SCSI_CDB; + + ring_req->id = sc->device->id; + ring_req->lun = sc->device->lun; + ring_req->channel = sc->device->channel; + ring_req->cmd_len = sc->cmd_len; + + BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE); + + if ( sc->cmd_len ) + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); + else + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); + + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; + ring_req->timeout_per_command = (sc->request->timeout / HZ); + + info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc; + info->shadow[rqid].sc_data_direction = sc->sc_data_direction; + info->shadow[rqid].act = ring_req->act; + + ref_cnt = map_data_for_request(info, sc, ring_req, rqid); + if (ref_cnt < 0) { + add_id_to_freelist(info, rqid); + if (ref_cnt == (-ENOMEM)) + goto out_host_busy; + else { + sc->result = (DID_ERROR << 16); + goto out_fail_command; + } + } + + ring_req->nr_segments = (uint8_t)ref_cnt; + info->shadow[rqid].nr_segments = ref_cnt; + + scsifront_do_request(info); + + return 0; + +out_host_busy: + return SCSI_MLQUEUE_HOST_BUSY; + +out_fail_command: + done(sc); + return 0; +} + + +static int scsifront_eh_abort_handler(struct scsi_cmnd *sc) +{ + return (FAILED); +} + +/* vscsi supports only device_reset, because it is each of LUNs */ +static int scsifront_dev_reset_handler(struct scsi_cmnd *sc) +{ + struct Scsi_Host *host = sc->device->host; + struct vscsifrnt_info *info = + (struct vscsifrnt_info *) sc->device->host->hostdata; + + vscsiif_request_t *ring_req; + uint16_t rqid; + int err; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) + spin_lock_irq(host->host_lock); +#endif + + ring_req = scsifront_pre_request(info); + ring_req->act = VSCSIIF_ACT_SCSI_RESET; + + rqid = ring_req->rqid; + info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET; + + ring_req->channel = sc->device->channel; + ring_req->id = sc->device->id; + ring_req->lun = sc->device->lun; + ring_req->cmd_len = sc->cmd_len; + + if ( sc->cmd_len ) + memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len); + else + memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE); + + ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction; + ring_req->timeout_per_command = (sc->request->timeout / HZ); + ring_req->nr_segments = 0; + + scsifront_do_request(info); + + spin_unlock_irq(host->host_lock); + wait_event_interruptible(info->shadow[rqid].wq_reset, + info->shadow[rqid].wait_reset); + spin_lock_irq(host->host_lock); + + err = info->shadow[rqid].rslt_reset; + + add_id_to_freelist(info, rqid); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) + spin_unlock_irq(host->host_lock); +#endif + return (err); +} + + +struct scsi_host_template scsifront_sht = { + .module = THIS_MODULE, + .name = "Xen SCSI frontend driver", + .queuecommand = scsifront_queuecommand, + .eh_abort_handler = scsifront_eh_abort_handler, + .eh_device_reset_handler= scsifront_dev_reset_handler, + .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN, + .can_queue = VSCSIIF_MAX_REQS, + .this_id = -1, + .sg_tablesize = VSCSIIF_SG_TABLESIZE, + .use_clustering = DISABLE_CLUSTERING, + .proc_name = "scsifront", +}; + + +static int __init scsifront_init(void) +{ + int err; + + if (!is_running_on_xen()) + return -ENODEV; + + err = scsifront_xenbus_init(); + + return err; +} + +static void __exit scsifront_exit(void) +{ + scsifront_xenbus_unregister(); +} + +module_init(scsifront_init); +module_exit(scsifront_exit); + +MODULE_DESCRIPTION("Xen SCSI frontend driver"); +MODULE_LICENSE("GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/scsifront/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/scsifront/xenbus.c @@ -0,0 +1,420 @@ +/* + * Xen SCSI frontend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include "common.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) + #define DEFAULT_TASK_COMM_LEN 16 +#else + #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN +#endif + +extern struct scsi_host_template scsifront_sht; + +static void scsifront_free(struct vscsifrnt_info *info) +{ + struct Scsi_Host *host = info->host; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) + if (host->shost_state != SHOST_DEL) { +#else + if (!test_bit(SHOST_DEL, &host->shost_state)) { +#endif + scsi_remove_host(info->host); + } + + if (info->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref, + (unsigned long)info->ring.sring); + info->ring_ref = GRANT_INVALID_REF; + info->ring.sring = NULL; + } + + if (info->irq) + unbind_from_irqhandler(info->irq, info); + info->irq = 0; + + scsi_host_put(info->host); +} + + +static int scsifront_alloc_ring(struct vscsifrnt_info *info) +{ + struct xenbus_device *dev = info->dev; + struct vscsiif_sring *sring; + int err = -ENOMEM; + + + info->ring_ref = GRANT_INVALID_REF; + + /***** Frontend to Backend ring start *****/ + sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)"); + return err; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(sring)); + if (err < 0) { + free_page((unsigned long) sring); + info->ring.sring = NULL; + xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)"); + goto free_sring; + } + info->ring_ref = err; + + err = bind_listening_port_to_irqhandler( + dev->otherend_id, scsifront_intr, + IRQF_SAMPLE_RANDOM, "scsifront", info); + + if (err <= 0) { + xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler"); + goto free_sring; + } + info->irq = err; + + return 0; + +/* free resource */ +free_sring: + scsifront_free(info); + + return err; +} + + +static int scsifront_init_ring(struct vscsifrnt_info *info) +{ + struct xenbus_device *dev = info->dev; + struct xenbus_transaction xbt; + int err; + + DPRINTK("%s\n",__FUNCTION__); + + err = scsifront_alloc_ring(info); + if (err) + return err; + DPRINTK("%u %u\n", info->ring_ref, info->evtchn); + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + } + + err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u", + info->ring_ref); + if (err) { + xenbus_dev_fatal(dev, err, "%s", "writing ring-ref"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + irq_to_evtchn_port(info->irq)); + + if (err) { + xenbus_dev_fatal(dev, err, "%s", "writing event-channel"); + goto fail; + } + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto free_sring; + } + + return 0; + +fail: + xenbus_transaction_end(xbt, 1); +free_sring: + /* free resource */ + scsifront_free(info); + + return err; +} + + +static int scsifront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct vscsifrnt_info *info; + struct Scsi_Host *host; + int i, err = -ENOMEM; + char name[DEFAULT_TASK_COMM_LEN]; + + host = scsi_host_alloc(&scsifront_sht, sizeof(*info)); + if (!host) { + xenbus_dev_fatal(dev, err, "fail to allocate scsi host"); + return err; + } + info = (struct vscsifrnt_info *) host->hostdata; + info->host = host; + + + dev_set_drvdata(&dev->dev, info); + info->dev = dev; + + for (i = 0; i < VSCSIIF_MAX_REQS; i++) { + info->shadow[i].next_free = i + 1; + init_waitqueue_head(&(info->shadow[i].wq_reset)); + info->shadow[i].wait_reset = 0; + } + info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff; + + err = scsifront_init_ring(info); + if (err) { + scsi_host_put(host); + return err; + } + + init_waitqueue_head(&info->wq); + spin_lock_init(&info->io_lock); + spin_lock_init(&info->shadow_lock); + + snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", info->host->host_no); + + info->kthread = kthread_run(scsifront_schedule, info, name); + if (IS_ERR(info->kthread)) { + err = PTR_ERR(info->kthread); + info->kthread = NULL; + printk(KERN_ERR "scsifront: kthread start err %d\n", err); + goto free_sring; + } + + host->max_id = VSCSIIF_MAX_TARGET; + host->max_channel = 0; + host->max_lun = VSCSIIF_MAX_LUN; + host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512; + + err = scsi_add_host(host, &dev->dev); + if (err) { + printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err); + goto free_sring; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + +free_sring: + /* free resource */ + scsifront_free(info); + return err; +} + +static int scsifront_remove(struct xenbus_device *dev) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + + DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename); + + if (info->kthread) { + kthread_stop(info->kthread); + info->kthread = NULL; + } + + scsifront_free(info); + + return 0; +} + + +static int scsifront_disconnect(struct vscsifrnt_info *info) +{ + struct xenbus_device *dev = info->dev; + struct Scsi_Host *host = info->host; + + DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename); + + /* + When this function is executed, all devices of + Frontend have been deleted. + Therefore, it need not block I/O before remove_host. + */ + + scsi_remove_host(host); + xenbus_frontend_closed(dev); + + return 0; +} + +#define VSCSIFRONT_OP_ADD_LUN 1 +#define VSCSIFRONT_OP_DEL_LUN 2 + +static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) +{ + struct xenbus_device *dev = info->dev; + int i, err = 0; + char str[64], state_str[64]; + char **dir; + unsigned int dir_n = 0; + unsigned int device_state; + unsigned int hst, chn, tgt, lun; + struct scsi_device *sdev; + + dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n); + if (IS_ERR(dir)) + return; + + for (i = 0; i < dir_n; i++) { + /* read status */ + snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]); + err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u", + &device_state); + if (XENBUS_EXIST_ERR(err)) + continue; + + /* virtual SCSI device */ + snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]); + err = xenbus_scanf(XBT_NIL, dev->otherend, str, + "%u:%u:%u:%u", &hst, &chn, &tgt, &lun); + if (XENBUS_EXIST_ERR(err)) + continue; + + /* front device state path */ + snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]); + + switch (op) { + case VSCSIFRONT_OP_ADD_LUN: + if (device_state == XenbusStateInitialised) { + sdev = scsi_device_lookup(info->host, chn, tgt, lun); + if (sdev) { + printk(KERN_ERR "scsifront: Device already in use.\n"); + scsi_device_put(sdev); + xenbus_printf(XBT_NIL, dev->nodename, + state_str, "%d", XenbusStateClosed); + } else { + scsi_add_device(info->host, chn, tgt, lun); + xenbus_printf(XBT_NIL, dev->nodename, + state_str, "%d", XenbusStateConnected); + } + } + break; + case VSCSIFRONT_OP_DEL_LUN: + if (device_state == XenbusStateClosing) { + sdev = scsi_device_lookup(info->host, chn, tgt, lun); + if (sdev) { + scsi_remove_device(sdev); + scsi_device_put(sdev); + xenbus_printf(XBT_NIL, dev->nodename, + state_str, "%d", XenbusStateClosed); + } + } + break; + default: + break; + } + } + + kfree(dir); + return; +} + + + + +static void scsifront_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + + DPRINTK("%p %u %u\n", dev, dev->state, backend_state); + + switch (backend_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateClosed: + break; + + case XenbusStateInitialised: + break; + + case XenbusStateConnected: + if (xenbus_read_driver_state(dev->nodename) == + XenbusStateInitialised) { + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); + } + + if (dev->state == XenbusStateConnected) + break; + + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + scsifront_disconnect(info); + break; + + case XenbusStateReconfiguring: + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN); + xenbus_switch_state(dev, XenbusStateReconfiguring); + break; + + case XenbusStateReconfigured: + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); + xenbus_switch_state(dev, XenbusStateConnected); + break; + } +} + + +static struct xenbus_device_id scsifront_ids[] = { + { "vscsi" }, + { "" } +}; + + +static struct xenbus_driver scsifront_driver = { + .name = "vscsi", + .ids = scsifront_ids, + .probe = scsifront_probe, + .remove = scsifront_remove, +/* .resume = scsifront_resume, */ + .otherend_changed = scsifront_backend_changed, +}; + +int scsifront_xenbus_init(void) +{ + return xenbus_register_frontend(&scsifront_driver); +} + +void scsifront_xenbus_unregister(void) +{ + xenbus_unregister_driver(&scsifront_driver); +} + --- linux-ec2-2.6.31.orig/drivers/xen/scsifront/Makefile +++ linux-ec2-2.6.31/drivers/xen/scsifront/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_XEN_SCSI_FRONTEND) := xenscsi.o +xenscsi-objs := scsifront.o xenbus.o --- linux-ec2-2.6.31.orig/drivers/xen/scsifront/common.h +++ linux-ec2-2.6.31/drivers/xen/scsifront/common.h @@ -0,0 +1,136 @@ +/* + * Xen SCSI frontend driver + * + * Copyright (c) 2008, FUJITSU Limited + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __XEN_DRIVERS_SCSIFRONT_H__ +#define __XEN_DRIVERS_SCSIFRONT_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#define GRANT_INVALID_REF 0 +#define VSCSI_IN_ABORT 1 +#define VSCSI_IN_RESET 2 + +/* tuning point*/ +#define VSCSIIF_DEFAULT_CMD_PER_LUN 10 +#define VSCSIIF_MAX_TARGET 64 +#define VSCSIIF_MAX_LUN 255 + +#define VSCSIIF_RING_SIZE \ + __RING_SIZE((struct vscsiif_sring *)0, PAGE_SIZE) +#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE + +struct vscsifrnt_shadow { + uint16_t next_free; + + /* command between backend and frontend + * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */ + unsigned char act; + + /* do reset function */ + wait_queue_head_t wq_reset; /* reset work queue */ + int wait_reset; /* reset work queue condition */ + int32_t rslt_reset; /* reset response status */ + /* (SUCESS or FAILED) */ + + /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3) + requests */ + unsigned int sc_data_direction; + + /* Number of pieces of scatter-gather */ + unsigned int nr_segments; + + /* requested struct scsi_cmnd is stored from kernel */ + unsigned long req_scsi_cmnd; + int gref[VSCSIIF_SG_TABLESIZE]; +}; + +struct vscsifrnt_info { + struct xenbus_device *dev; + + struct Scsi_Host *host; + + spinlock_t io_lock; + spinlock_t shadow_lock; + unsigned int evtchn; + unsigned int irq; + + grant_ref_t ring_ref; + struct vscsiif_front_ring ring; + struct vscsiif_response ring_res; + + struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS]; + uint32_t shadow_free; + + struct task_struct *kthread; + wait_queue_head_t wq; + unsigned int waiting_resp; + +}; + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +int scsifront_xenbus_init(void); +void scsifront_xenbus_unregister(void); +int scsifront_schedule(void *data); +irqreturn_t scsifront_intr(int irq, void *dev_id); +int scsifront_cmd_done(struct vscsifrnt_info *info); + + +#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_comms.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_comms.c @@ -35,24 +35,57 @@ #include #include #include +#if defined(CONFIG_XEN) || defined(MODULE) +#include +#include +#else #include #include #include +#endif + #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + static int xenbus_irq; +extern void xenbus_probe(struct work_struct *); static DECLARE_WORK(probe_work, xenbus_probe); static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); + int old, new; + + old = atomic_read(&xenbus_xsd_state); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + BUG(); + return IRQ_HANDLED; + + case XENBUS_XSD_FOREIGN_INIT: + new = XENBUS_XSD_FOREIGN_READY; + break; + + case XENBUS_XSD_LOCAL_INIT: + new = XENBUS_XSD_LOCAL_READY; + break; + + case XENBUS_XSD_FOREIGN_READY: + case XENBUS_XSD_LOCAL_READY: + default: + goto wake; } + old = atomic_cmpxchg(&xenbus_xsd_state, old, new); + if (old != new) + schedule_work(&probe_work); + +wake: wake_up(&xb_waitq); return IRQ_HANDLED; } @@ -203,6 +236,7 @@ int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; + int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -215,20 +249,33 @@ intf->rsp_cons = intf->rsp_prod; } +#if defined(CONFIG_XEN) || defined(MODULE) + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + + err = bind_caller_port_to_irqhandler( + xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; +#else if (xenbus_irq) { /* Already have an irq; assume we're resuming */ rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); } else { - int err; err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); if (err <= 0) { printk(KERN_ERR "XENBUS request irq failed %i\n", err); return err; } - xenbus_irq = err; } +#endif return 0; } --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_probe_backend.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,285 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env); +static int xenbus_probe_backend(const char *type, const char *domid); + +extern int read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "frontend-id", "frontend"); +} + +/* backend/// => -- */ +static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", + typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static struct device_attribute xenbus_backend_attrs[] = { + __ATTR_NULL +}; + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .error = -ENODEV, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_backend, + .dev_attrs = xenbus_backend_attrs, + }, + .dev = { + .init_name = "xen-backend", + }, +}; + +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); + + add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root); + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, env); + } + + return 0; +} + +int __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + +/* backend/// */ +static int xenbus_probe_backend_unit(const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(&xenbus_backend, type, nodename); + kfree(nodename); + return err; +} + +/* backend// */ +static int xenbus_probe_backend(const char *type, const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +void xenbus_backend_suspend(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} + +void xenbus_backend_resume(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} + +void xenbus_backend_probe_and_watch(void) +{ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); +} + +void xenbus_backend_bus_register(void) +{ + xenbus_backend.error = bus_register(&xenbus_backend.bus); + if (xenbus_backend.error) + printk(KERN_WARNING + "XENBUS: Error registering backend bus: %i\n", + xenbus_backend.error); +} + +void xenbus_backend_device_register(void) +{ + if (xenbus_backend.error) + return; + + xenbus_backend.error = device_register(&xenbus_backend.dev); + if (xenbus_backend.error) { + bus_unregister(&xenbus_backend.bus); + printk(KERN_WARNING + "XENBUS: Error registering backend device: %i\n", + xenbus_backend.error); + } +} + +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)) +{ + return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn); +} +EXPORT_SYMBOL_GPL(xenbus_for_each_backend); --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_backend_client.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_backend_client.c @@ -0,0 +1,147 @@ +/****************************************************************************** + * Backend-client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code in the backend + * driver. + * + * Copyright (C) 2005-2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +/* Based on Rusty Russell's skeleton driver's map_page */ +struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref) +{ + struct gnttab_map_grant_ref op; + struct vm_struct *area; + + area = alloc_vm_area(PAGE_SIZE); + if (!area) + return ERR_PTR(-ENOMEM); + + gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + free_vm_area(area); + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + BUG_ON(!IS_ERR(ERR_PTR(op.status))); + return ERR_PTR(op.status); + } + + /* Stuff the handle in an unused field */ + area->phys_addr = (unsigned long)op.handle; + + return area; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + + +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + gnt_ref, dev->otherend_id); + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + } else + *handle = op.handle; + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring); + + +/* Based on Rusty Russell's skeleton driver's unmap_page */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map, + (grant_handle_t)area->phys_addr); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status == GNTST_okay) + free_vm_area(area); + else + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + (int16_t)area->phys_addr, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + + +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t handle, void *vaddr) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, + handle); + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + handle, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_comms.h +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_comms.h @@ -43,4 +43,20 @@ extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +/* For xenbus internal use. */ +enum { + XENBUS_XSD_UNCOMMITTED = 0, + XENBUS_XSD_FOREIGN_INIT, + XENBUS_XSD_FOREIGN_READY, + XENBUS_XSD_LOCAL_INIT, + XENBUS_XSD_LOCAL_READY, +}; +extern atomic_t xenbus_xsd_state; + +static inline int is_xenstored_ready(void) +{ + int s = atomic_read(&xenbus_xsd_state); + return s == XENBUS_XSD_FOREIGN_READY || s == XENBUS_XSD_LOCAL_READY; +} + #endif /* _XENBUS_COMMS_H */ --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_probe.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_probe.c @@ -4,6 +4,7 @@ * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -32,9 +33,10 @@ #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) + __FUNCTION__, __LINE__, ##args) #include +#include #include #include #include @@ -42,27 +44,45 @@ #include #include #include -#include #include #include #include #include +#if defined(CONFIG_XEN) || defined(MODULE) +#include +#include +#include +#include +#include +#include +#ifdef MODULE +#include +#endif +#else #include #include #include #include +#endif #include "xenbus_comms.h" #include "xenbus_probe.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif int xen_store_evtchn; +#if !defined(CONFIG_XEN) && !defined(MODULE) EXPORT_SYMBOL(xen_store_evtchn); +#endif struct xenstore_domain_interface *xen_store_interface; static unsigned long xen_store_mfn; +extern struct mutex xenwatch_mutex; + static BLOCKING_NOTIFIER_HEAD(xenstore_chain); static void wait_for_devices(struct xenbus_driver *xendrv); @@ -71,8 +91,10 @@ static void xenbus_dev_shutdown(struct device *_dev); +#if !defined(CONFIG_XEN) && !defined(MODULE) static int xenbus_dev_suspend(struct device *dev, pm_message_t state); static int xenbus_dev_resume(struct device *dev); +#endif /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -95,16 +117,6 @@ return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } -static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) -{ - struct xenbus_device *dev = to_xenbus_device(_dev); - - if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) - return -ENOMEM; - - return 0; -} - /* device// => - */ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) { @@ -173,9 +185,33 @@ return read_otherend_details(xendev, "backend-id", "backend"); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + + if (dev == NULL) + return -ENODEV; + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ +#if defined(CONFIG_XEN) || defined(MODULE) + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); +#endif + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype); + + return 0; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) static struct device_attribute xenbus_dev_attrs[] = { __ATTR_NULL }; +#endif /* Bus type for frontend drivers. */ static struct xen_bus_type xenbus_frontend = { @@ -183,18 +219,29 @@ .levels = 2, /* device/type/ */ .get_bus_id = frontend_bus_id, .probe = xenbus_probe_frontend, + .error = -ENODEV, .bus = { .name = "xen", .match = xenbus_match, - .uevent = xenbus_uevent, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_frontend, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) .dev_attrs = xenbus_dev_attrs, - +#endif +#if !defined(CONFIG_XEN) && !defined(MODULE) .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, +#endif + }, +#if defined(CONFIG_XEN) || defined(MODULE) + .dev = { + .init_name = "xen", }, +#endif }; static void otherend_changed(struct xenbus_watch *watch, @@ -210,17 +257,17 @@ if (!dev->otherend || strncmp(dev->otherend, vec[XS_WATCH_PATH], strlen(dev->otherend))) { - dev_dbg(&dev->dev, "Ignoring watch at %s\n", - vec[XS_WATCH_PATH]); + dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]); return; } state = xenbus_read_driver_state(dev->otherend); - dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", + dev_dbg(&dev->dev, "state is %d (%s), %s, %s", state, xenbus_strstate(state), dev->otherend_watch.node, vec[XS_WATCH_PATH]); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) /* * Ignore xenbus transitions during shutdown. This prevents us doing * work that can fail e.g., when the rootfs is gone. @@ -234,6 +281,7 @@ xenbus_frontend_closed(dev); return; } +#endif if (drv->otherend_changed) drv->otherend_changed(dev, state); @@ -253,8 +301,13 @@ static int watch_otherend(struct xenbus_device *dev) { +#if defined(CONFIG_XEN) || defined(MODULE) + return xenbus_watch_path2(dev, dev->otherend, "state", + &dev->otherend_watch, otherend_changed); +#else return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, "%s/%s", dev->otherend, "state"); +#endif } @@ -280,8 +333,9 @@ err = talk_to_otherend(dev); if (err) { - dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", - dev->nodename); + dev_warn(&dev->dev, + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); return err; } @@ -291,8 +345,9 @@ err = watch_otherend(dev); if (err) { - dev_warn(&dev->dev, "watch_otherend on %s failed.\n", - dev->nodename); + dev_warn(&dev->dev, + "xenbus_probe: watch_otherend on %s failed.\n", + dev->nodename); return err; } @@ -327,17 +382,25 @@ DPRINTK("%s", dev->nodename); +/* Commented out since xenstored stubdom is now minios based not linux based +#define XENSTORE_DOMAIN_SHARES_THIS_KERNEL +*/ +#ifndef XENSTORE_DOMAIN_SHARES_THIS_KERNEL + if (is_initial_xendomain()) +#endif + return; + get_device(&dev->dev); if (dev->state != XenbusStateConnected) { - printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, - dev->nodename, xenbus_strstate(dev->state)); + dev_info(&dev->dev, "%s: %s: %s != Connected, skipping\n", __FUNCTION__, + dev->nodename, xenbus_strstate(dev->state)); goto out; } xenbus_switch_state(dev, XenbusStateClosing); timeout = wait_for_completion_timeout(&dev->down, timeout); if (!timeout) - printk(KERN_INFO "%s: %s timeout closing device\n", - __func__, dev->nodename); + dev_info(&dev->dev, "%s: %s timeout closing device\n", + __FUNCTION__, dev->nodename); out: put_device(&dev->dev); } @@ -347,12 +410,29 @@ struct module *owner, const char *mod_name) { + int ret; + + if (bus->error) + return bus->error; + drv->driver.name = drv->name; drv->driver.bus = &bus->bus; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) drv->driver.owner = owner; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) drv->driver.mod_name = mod_name; +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + drv->driver.probe = xenbus_dev_probe; + drv->driver.remove = xenbus_dev_remove; + drv->driver.shutdown = xenbus_dev_shutdown; +#endif - return driver_register(&drv->driver); + mutex_lock(&xenwatch_mutex); + ret = driver_register(&drv->driver); + mutex_unlock(&xenwatch_mutex); + return ret; } int __xenbus_register_frontend(struct xenbus_driver *drv, @@ -450,21 +530,30 @@ } static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); } @@ -474,7 +563,6 @@ const char *type, const char *nodename) { - char devname[XEN_BUS_ID_SIZE]; int err; struct xenbus_device *xendev; size_t stringlen; @@ -482,6 +570,9 @@ enum xenbus_state state = xenbus_read_driver_state(nodename); + if (bus->error) + return bus->error; + if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a device is going away after switching to Closed. */ @@ -506,15 +597,26 @@ xendev->devicetype = tmpstring; init_completion(&xendev->down); +#if defined(CONFIG_XEN) || defined(MODULE) + xendev->dev.parent = &bus->dev; +#endif xendev->dev.bus = &bus->bus; xendev->dev.release = xenbus_dev_release; - err = bus->get_bus_id(devname, xendev->nodename); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) + { + char devname[XEN_BUS_ID_SIZE]; + + err = bus->get_bus_id(devname, xendev->nodename); + if (!err) + dev_set_name(&xendev->dev, devname); + } +#else + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); +#endif if (err) goto fail; - dev_set_name(&xendev->dev, devname); - /* Register with generic device framework. */ err = device_register(&xendev->dev); if (err) @@ -588,6 +690,9 @@ char **dir; unsigned int i, dir_n; + if (bus->error) + return bus->error; + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); if (IS_ERR(dir)) return PTR_ERR(dir); @@ -631,7 +736,7 @@ char type[XEN_BUS_ID_SIZE]; const char *p, *root; - if (char_count(node, '/') < 2) + if (bus->error || char_count(node, '/') < 2) return; exists = xenbus_exists(XBT_NIL, node, ""); @@ -660,7 +765,9 @@ kfree(root); } +#if !defined(CONFIG_XEN) && !defined(MODULE) EXPORT_SYMBOL_GPL(xenbus_dev_changed); +#endif static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -676,7 +783,11 @@ .callback = frontend_changed, }; +#if !defined(CONFIG_XEN) && !defined(MODULE) static int xenbus_dev_suspend(struct device *dev, pm_message_t state) +#else +static int suspend_dev(struct device *dev, void *data) +#endif { int err = 0; struct xenbus_driver *drv; @@ -689,14 +800,45 @@ drv = to_xenbus_driver(dev->driver); xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) +#if !defined(CONFIG_XEN) && !defined(MODULE) err = drv->suspend(xdev, state); +#else + err = drv->suspend(xdev); +#endif if (err) printk(KERN_WARNING "xenbus: suspend %s failed: %i\n", dev_name(dev), err); return 0; } +#if defined(CONFIG_XEN) || defined(MODULE) +static int suspend_cancel_dev(struct device *dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + if (drv->suspend_cancel) + err = drv->suspend_cancel(xdev); + if (err) + printk(KERN_WARNING + "xenbus: suspend_cancel %s failed: %i\n", + dev_name(dev), err); + return 0; +} +#endif + +#if !defined(CONFIG_XEN) && !defined(MODULE) static int xenbus_dev_resume(struct device *dev) +#else +static int resume_dev(struct device *dev, void *data) +#endif { int err; struct xenbus_driver *drv; @@ -741,15 +883,47 @@ return 0; } +#if defined(CONFIG_XEN) || defined(MODULE) +void xenbus_suspend(void) +{ + DPRINTK(""); + + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + xenbus_backend_suspend(suspend_dev); + xs_suspend(); +} +EXPORT_SYMBOL_GPL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + xenbus_backend_resume(resume_dev); +} +EXPORT_SYMBOL_GPL(xenbus_resume); + +void xenbus_suspend_cancel(void) +{ + xs_suspend_cancel(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); + xenbus_backend_resume(suspend_cancel_dev); +} +EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); +#endif + /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED); int register_xenstore_notifier(struct notifier_block *nb) { int ret = 0; - if (xenstored_ready > 0) + if (is_xenstored_ready()) ret = nb->notifier_call(nb, 0, NULL); else blocking_notifier_chain_register(&xenstore_chain, nb); @@ -764,9 +938,10 @@ } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); + void xenbus_probe(struct work_struct *unused) { - BUG_ON((xenstored_ready <= 0)); + BUG_ON(!is_xenstored_ready()); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); @@ -777,49 +952,245 @@ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) +static struct file_operations xsd_kva_fops; +static struct proc_dir_entry *xsd_kva_intf; +static struct proc_dir_entry *xsd_port_intf; + +static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + int old; + int rc; + + old = atomic_cmpxchg(&xenbus_xsd_state, + XENBUS_XSD_UNCOMMITTED, + XENBUS_XSD_LOCAL_INIT); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + rc = xb_init_comms(); + if (rc != 0) + return rc; + break; + + case XENBUS_XSD_FOREIGN_INIT: + case XENBUS_XSD_FOREIGN_READY: + return -EBUSY; + + case XENBUS_XSD_LOCAL_INIT: + case XENBUS_XSD_LOCAL_READY: + default: + break; + } + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int xsd_kva_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "0x%p", xen_store_interface); + *eof = 1; + return len; +} + +static int xsd_port_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d", xen_store_evtchn); + *eof = 1; + return len; +} +#endif + +#if defined(CONFIG_XEN) || defined(MODULE) +static int xb_free_port(evtchn_port_t port) +{ + struct evtchn_close close; + close.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); +} + +int xenbus_conn(domid_t remote_dom, unsigned long *grant_ref, evtchn_port_t *local_port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int rc, rc2; + + BUG_ON(atomic_read(&xenbus_xsd_state) != XENBUS_XSD_FOREIGN_INIT); + BUG_ON(!is_initial_xendomain()); + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + remove_xen_proc_entry("xsd_kva"); + remove_xen_proc_entry("xsd_port"); +#endif + + rc = xb_free_port(xen_store_evtchn); + if (rc != 0) + goto fail0; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_dom; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + goto fail0; + *local_port = xen_store_evtchn = alloc_unbound.port; + + /* keep the old page (xen_store_mfn, xen_store_interface) */ + rc = gnttab_grant_foreign_access(remote_dom, xen_store_mfn, + GTF_permit_access); + if (rc < 0) + goto fail1; + *grant_ref = rc; + + rc = xb_init_comms(); + if (rc != 0) + goto fail1; + + return 0; + +fail1: + rc2 = xb_free_port(xen_store_evtchn); + if (rc2 != 0) + printk(KERN_WARNING + "XENBUS: Error freeing xenstore event channel: %d\n", + rc2); +fail0: + xen_store_evtchn = -1; + return rc; +} +#endif + +#ifndef MODULE static int __init xenbus_probe_init(void) +#else +static int __devinit xenbus_probe_init(void) +#endif { int err = 0; +#if defined(CONFIG_XEN) || defined(MODULE) + unsigned long page = 0; +#endif DPRINTK(""); - err = -ENODEV; - if (!xen_domain()) - goto out_error; + if (!is_running_on_xen()) + return -ENODEV; /* Register ourselves with the kernel bus subsystem */ - err = bus_register(&xenbus_frontend.bus); - if (err) - goto out_error; - - err = xenbus_backend_bus_register(); - if (err) - goto out_unreg_front; + xenbus_frontend.error = bus_register(&xenbus_frontend.bus); + if (xenbus_frontend.error) + printk(KERN_WARNING + "XENBUS: Error registering frontend bus: %i\n", + xenbus_frontend.error); + xenbus_backend_bus_register(); /* * Domain0 doesn't have a store_evtchn or store_mfn yet. */ - if (xen_initial_domain()) { + if (is_initial_xendomain()) { +#if defined(CONFIG_XEN) || defined(MODULE) + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate page. */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto err; + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + /* And finally publish the above info in /proc/xen */ + xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600); + if (xsd_kva_intf) { + memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops, + sizeof(xsd_kva_fops)); + xsd_kva_fops.mmap = xsd_kva_mmap; + xsd_kva_intf->proc_fops = &xsd_kva_fops; + xsd_kva_intf->read_proc = xsd_kva_read; + } + xsd_port_intf = create_xen_proc_entry("xsd_port", 0400); + if (xsd_port_intf) + xsd_port_intf->read_proc = xsd_port_read; +#endif +#else /* dom0 not yet supported */ +#endif + xen_store_interface = mfn_to_virt(xen_store_mfn); } else { - xenstored_ready = 1; + atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY); +#ifndef MODULE xen_store_evtchn = xen_start_info->store_evtchn; xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); +#else + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, + PAGE_SIZE); +#endif + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + goto err; } - xen_store_interface = mfn_to_virt(xen_store_mfn); + +#if defined(CONFIG_XEN) || defined(MODULE) + xenbus_dev_init(); +#endif /* Initialize the interface to xenstore. */ err = xs_init(); if (err) { printk(KERN_WARNING "XENBUS: Error initializing xenstore comms: %i\n", err); - goto out_unreg_back; + goto err; + } + +#if defined(CONFIG_XEN) || defined(MODULE) + /* Register ourselves with the kernel device subsystem */ + if (!xenbus_frontend.error) { + xenbus_frontend.error = device_register(&xenbus_frontend.dev); + if (xenbus_frontend.error) { + bus_unregister(&xenbus_frontend.bus); + printk(KERN_WARNING + "XENBUS: Error registering frontend device: %i\n", + xenbus_frontend.error); + } } +#endif + xenbus_backend_device_register(); - if (!xen_initial_domain()) + if (!is_initial_xendomain()) xenbus_probe(NULL); -#ifdef CONFIG_XEN_COMPAT_XENFS +#if defined(CONFIG_XEN_COMPAT_XENFS) && !defined(MODULE) /* * Create xenfs mountpoint in /proc for compatibility with * utilities that expect to find "xenbus" under "/proc/xen". @@ -829,21 +1200,36 @@ return 0; - out_unreg_back: - xenbus_backend_bus_unregister(); + err: +#if defined(CONFIG_XEN) || defined(MODULE) + if (page) + free_page(page); +#endif - out_unreg_front: - bus_unregister(&xenbus_frontend.bus); + /* + * Do not unregister the xenbus front/backend buses here. The buses + * must exist because front/backend drivers will use them when they are + * registered. + */ - out_error: return err; } +#ifndef MODULE postcore_initcall(xenbus_probe_init); - +#ifdef CONFIG_XEN +MODULE_LICENSE("Dual BSD/GPL"); +#else MODULE_LICENSE("GPL"); +#endif +#else +int __devinit xenbus_init(void) +{ + return xenbus_probe_init(); +} +#endif -static int is_disconnected_device(struct device *dev, void *data) +static int is_device_connecting(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; @@ -861,20 +1247,24 @@ return 0; xendrv = to_xenbus_driver(dev->driver); - return (xendev->state != XenbusStateConnected || - (xendrv->is_ready && !xendrv->is_ready(xendev))); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); } -static int exists_disconnected_device(struct device_driver *drv) +static int exists_connecting_device(struct device_driver *drv) { + if (xenbus_frontend.error) + return xenbus_frontend.error; return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - is_disconnected_device); + is_device_connecting); } static int print_device_status(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; + struct xenbus_driver *xendrv; /* Is this operation limited to a particular driver? */ if (drv && (dev->driver != drv)) @@ -884,12 +1274,23 @@ /* Information only: is this too noisy? */ printk(KERN_INFO "XENBUS: Device with no driver: %s\n", xendev->nodename); - } else if (xendev->state != XenbusStateConnected) { + return 0; + } + + if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (state %d)\n", - xendev->nodename, xendev->state); + "to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); } + xendrv = to_xenbus_driver(dev->driver); + if (xendrv->is_ready && !xendrv->is_ready(xendev)) + printk(KERN_WARNING "XENBUS: Device not ready: %s\n", + xendev->nodename); + return 0; } @@ -897,7 +1298,7 @@ static int ready_to_wait_for_devices; /* - * On a 10 second timeout, wait for all devices currently configured. We need + * On a 5-minute timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices * needed for boot are available, before we can allow the boot to proceed. * @@ -912,18 +1313,30 @@ */ static void wait_for_devices(struct xenbus_driver *xendrv) { - unsigned long timeout = jiffies + 10*HZ; + unsigned long start = jiffies; struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; - if (!ready_to_wait_for_devices || !xen_domain()) + if (!ready_to_wait_for_devices || !is_running_on_xen()) return; - while (exists_disconnected_device(drv)) { - if (time_after(jiffies, timeout)) - break; + while (exists_connecting_device(drv)) { + if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { + if (!seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + seconds_waited += 5; + printk("%us...", 300 - seconds_waited); + if (seconds_waited == 300) + break; + } + schedule_timeout_interruptible(HZ/10); } + if (seconds_waited) + printk("\n"); + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, print_device_status); } @@ -931,10 +1344,18 @@ #ifndef MODULE static int __init boot_wait_for_devices(void) { - ready_to_wait_for_devices = 1; - wait_for_devices(NULL); + if (!xenbus_frontend.error) { + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + } return 0; } late_initcall(boot_wait_for_devices); #endif + +int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *)) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn); +} +EXPORT_SYMBOL_GPL(xenbus_for_each_frontend); --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_client.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_client.c @@ -30,6 +30,12 @@ * IN THE SOFTWARE. */ +#if defined(CONFIG_XEN) || defined(MODULE) +#include +#include +#include +#include +#else #include #include #include @@ -37,8 +43,13 @@ #include #include #include +#endif #include +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + const char *xenbus_strstate(enum xenbus_state state) { static const char *const name[] = { @@ -49,6 +60,8 @@ [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", + [ XenbusStateReconfiguring ] = "Reconfiguring", + [ XenbusStateReconfigured ] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } @@ -91,6 +104,26 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); +#if defined(CONFIG_XEN) || defined(MODULE) +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + char *state = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", path, path2); + if (!state) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + err = xenbus_watch_path(dev, state, watch, callback); + + if (err) + kfree(state); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_path2); +#else /** * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path * @dev: xenbus device @@ -131,6 +164,7 @@ return err; } EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +#endif /** @@ -200,13 +234,12 @@ } -static void xenbus_va_dev_error(struct xenbus_device *dev, int err, - const char *fmt, va_list ap) +static void _dev_error(struct xenbus_device *dev, int err, + const char *fmt, va_list ap) { int ret; unsigned int len; - char *printf_buffer = NULL; - char *path_buffer = NULL; + char *printf_buffer = NULL, *path_buffer = NULL; #define PRINTF_BUFFER_SIZE 4096 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); @@ -223,14 +256,16 @@ path_buffer = error_path(dev); if (path_buffer == NULL) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", - dev->nodename, printf_buffer); + dev_err(&dev->dev, + "xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); goto fail; } if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", - dev->nodename, printf_buffer); + dev_err(&dev->dev, + "xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); goto fail; } @@ -249,16 +284,18 @@ * Report the given negative errno into the store, along with the given * formatted message. */ -void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); } EXPORT_SYMBOL_GPL(xenbus_dev_error); + /** * xenbus_dev_fatal * @dev: xenbus device @@ -269,24 +306,25 @@ * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly * closedown of this driver and its peer. */ - -void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); xenbus_switch_state(dev, XenbusStateClosing); } EXPORT_SYMBOL_GPL(xenbus_dev_fatal); + /** * xenbus_grant_ring * @dev: xenbus device * @ring_mfn: mfn of ring to grant - + * * Grant access to the given @ring_mfn to the peer of the given device. Return * 0 on success, or -errno on error. On error, the device will switch to * XenbusStateClosing, and the error will be saved in the store. @@ -312,7 +350,7 @@ struct evtchn_alloc_unbound alloc_unbound; int err; - alloc_unbound.dom = DOMID_SELF; + alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = dev->otherend_id; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, @@ -327,6 +365,7 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */ /** * Bind to an existing interdomain event channel in another domain. Returns 0 * on success and stores the local port in *port. On error, returns -errno, @@ -352,6 +391,7 @@ return err; } EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); +#endif /** @@ -373,6 +413,7 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */ /** * xenbus_map_ring_valloc * @dev: xenbus device @@ -547,6 +588,7 @@ return op.status; } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); +#endif /** --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_probe.h +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_probe.h @@ -34,29 +34,46 @@ #ifndef _XENBUS_PROBE_H #define _XENBUS_PROBE_H +#ifndef BUS_ID_SIZE #define XEN_BUS_ID_SIZE 20 +#else +#define XEN_BUS_ID_SIZE BUS_ID_SIZE +#endif + +#ifdef CONFIG_PARAVIRT_XEN +#define is_running_on_xen() xen_domain() +#define is_initial_xendomain() xen_initial_domain() +#endif -#ifdef CONFIG_XEN_BACKEND +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) +#define dev_name(dev) ((dev)->bus_id) +#endif + +#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE) extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); extern void xenbus_backend_probe_and_watch(void); -extern int xenbus_backend_bus_register(void); -extern void xenbus_backend_bus_unregister(void); +extern void xenbus_backend_bus_register(void); +extern void xenbus_backend_device_register(void); #else static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} static inline void xenbus_backend_probe_and_watch(void) {} -static inline int xenbus_backend_bus_register(void) { return 0; } -static inline void xenbus_backend_bus_unregister(void) {} +static inline void xenbus_backend_bus_register(void) {} +static inline void xenbus_backend_device_register(void) {} #endif struct xen_bus_type { char *root; + int error; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(const char *type, const char *dir); struct bus_type bus; +#if defined(CONFIG_XEN) || defined(MODULE) + struct device dev; +#endif }; extern int xenbus_match(struct device *_dev, struct device_driver *_drv); --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_dev.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_dev.c @@ -0,0 +1,460 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenbus_comms.h" + +#include +#include +#include +#include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#include + +struct xenbus_dev_transaction { + struct list_head list; + struct xenbus_transaction handle; +}; + +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_dev_data { + /* In-progress transaction. */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ + struct list_head read_buffers; + wait_queue_head_t read_waitq; + + struct mutex reply_mutex; +}; + +static struct proc_dir_entry *xenbus_dev_intf; + +static ssize_t xenbus_dev_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_dev_data *u = filp->private_data; + struct read_buffer *rb; + int i, ret; + + if (!is_xenstored_ready()) + return -ENODEV; + + mutex_lock(&u->reply_mutex); + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + for (i = 0; i < len;) { + put_user(rb->msg[rb->cons], ubuf + i); + i++; + rb->cons++; + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + mutex_unlock(&u->reply_mutex); + + return i; +} + +static void queue_reply(struct xenbus_dev_data *u, + char *data, unsigned int len) +{ + struct read_buffer *rb; + + if (len == 0) + return; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + BUG_ON(rb == NULL); + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, &u->read_buffers); + + wake_up(&u->read_waitq); +} + +struct watch_adapter +{ + struct list_head list; + struct xenbus_watch watch; + struct xenbus_dev_data *dev_data; + char *token; +}; + +static void free_watch_adapter (struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static void watch_fired(struct xenbus_watch *watch, + const char **vec, + unsigned int len) +{ + struct watch_adapter *adap = + container_of(watch, struct watch_adapter, watch); + struct xsd_sockmsg hdr; + const char *path, *token; + int path_len, tok_len, body_len, data_len = 0; + + path = vec[XS_WATCH_PATH]; + token = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token) + 1; + if (len > 2) + data_len = vec[len] - vec[2] + 1; + body_len = path_len + tok_len + data_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr)); + queue_reply(adap->dev_data, (char *)path, path_len); + queue_reply(adap->dev_data, (char *)token, tok_len); + if (len > 2) + queue_reply(adap->dev_data, (char *)vec[2], data_len); + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static LIST_HEAD(watch_list); + +static ssize_t xenbus_dev_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_dev_data *u = filp->private_data; + struct xenbus_dev_transaction *trans = NULL; + uint32_t msg_type; + void *reply; + char *path, *token; + struct watch_adapter *watch, *tmp_watch; + int err, rc = len; + + if (!is_xenstored_ready()) + return -ENODEV; + + if ((len + u->len) > sizeof(u->u.buffer)) { + rc = -EINVAL; + goto out; + } + + if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) { + rc = -EFAULT; + goto out; + } + + u->len += len; + if ((u->len < sizeof(u->u.msg)) || + (u->len < (sizeof(u->u.msg) + u->u.msg.len))) + return rc; + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: { + static const char *XS_RESP = "OK"; + struct xsd_sockmsg hdr; + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = -EILSEQ; + goto out; + } + token++; + + if (msg_type == XS_WATCH) { + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + watch->watch.node = kmalloc(strlen(path)+1, + GFP_KERNEL); + strcpy((char *)watch->watch.node, path); + watch->watch.callback = watch_fired; + watch->token = kmalloc(strlen(token)+1, GFP_KERNEL); + strcpy(watch->token, token); + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry_safe(watch, tmp_watch, + &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) + { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + hdr.type = msg_type; + hdr.len = strlen(XS_RESP) + 1; + mutex_lock(&u->reply_mutex); + queue_reply(u, (char *)&hdr, sizeof(hdr)); + queue_reply(u, (char *)XS_RESP, hdr.len); + mutex_unlock(&u->reply_mutex); + break; + } + + default: + if (msg_type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + kfree(trans); + rc = PTR_ERR(reply); + goto out; + } + + if (msg_type == XS_TRANSACTION_START) { + trans->handle.id = simple_strtoul(reply, NULL, 0); + list_add(&trans->list, &u->transactions); + } else if (msg_type == XS_TRANSACTION_END) { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); + kfree(trans); + } + mutex_lock(&u->reply_mutex); + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + queue_reply(u, (char *)reply, u->u.msg.len); + mutex_unlock(&u->reply_mutex); + kfree(reply); + break; + } + + out: + u->len = 0; + return rc; +} + +static int xenbus_dev_open(struct inode *inode, struct file *filp) +{ + struct xenbus_dev_data *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + nonseekable_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + + mutex_init(&u->reply_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_dev_release(struct inode *inode, struct file *filp) +{ + struct xenbus_dev_data *u = filp->private_data; + struct xenbus_dev_transaction *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + kfree(u); + + return 0; +} + +static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait) +{ + struct xenbus_dev_data *u = file->private_data; + + if (!is_xenstored_ready()) + return -ENODEV; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return POLLIN | POLLRDNORM; + return 0; +} + +#ifdef HAVE_UNLOCKED_IOCTL +static long xenbus_dev_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + extern int xenbus_conn(domid_t remote_dom, int *grant_ref, + evtchn_port_t *local_port); + void __user *udata = (void __user *) data; + int ret = -ENOTTY; + + if (!is_initial_xendomain()) + return -ENODEV; + + + switch (cmd) { + case IOCTL_XENBUS_ALLOC: { + xenbus_alloc_t xa; + int old; + + old = atomic_cmpxchg(&xenbus_xsd_state, + XENBUS_XSD_UNCOMMITTED, + XENBUS_XSD_FOREIGN_INIT); + if (old != XENBUS_XSD_UNCOMMITTED) + return -EBUSY; + + if (copy_from_user(&xa, udata, sizeof(xa))) { + ret = -EFAULT; + atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED); + break; + } + + ret = xenbus_conn(xa.dom, &xa.grant_ref, &xa.port); + if (ret != 0) { + atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED); + break; + } + + if (copy_to_user(udata, &xa, sizeof(xa))) { + ret = -EFAULT; + atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED); + break; + } + } + break; + + default: + break; + } + + return ret; +} +#endif + +static const struct file_operations xenbus_dev_file_ops = { + .read = xenbus_dev_read, + .write = xenbus_dev_write, + .open = xenbus_dev_open, + .release = xenbus_dev_release, + .poll = xenbus_dev_poll, +#ifdef HAVE_UNLOCKED_IOCTL + .unlocked_ioctl = xenbus_dev_ioctl +#endif +}; + +int xenbus_dev_init(void) +{ + xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400); + if (xenbus_dev_intf) + xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops; + + return 0; +} --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/Makefile +++ linux-ec2-2.6.31/drivers/xen/xenbus/Makefile @@ -1,7 +1,9 @@ -obj-y += xenbus.o +obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o +obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o -xenbus-objs = -xenbus-objs += xenbus_client.o -xenbus-objs += xenbus_comms.o -xenbus-objs += xenbus_xs.o -xenbus-objs += xenbus_probe.o +xenbus_be-objs = +xenbus_be-objs += xenbus_backend_client.o + +xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +obj-y += $(xenbus-y) $(xenbus-m) +obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o --- linux-ec2-2.6.31.orig/drivers/xen/xenbus/xenbus_xs.c +++ linux-ec2-2.6.31/drivers/xen/xenbus/xenbus_xs.c @@ -47,6 +47,14 @@ #include #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#ifndef PF_NOFREEZE /* Old kernel (pre-2.6.6). */ +#define PF_NOFREEZE 0 +#endif + struct xs_stored_msg { struct list_head list; @@ -108,7 +116,7 @@ * carrying out work. */ static pid_t xenwatch_pid; -static DEFINE_MUTEX(xenwatch_mutex); +/* static */ DEFINE_MUTEX(xenwatch_mutex); static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); static int get_error(const char *errorstring) @@ -177,14 +185,16 @@ mutex_unlock(&xs_state.request_mutex); - if ((msg->type == XS_TRANSACTION_END) || + if ((req_msg.type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) up_read(&xs_state.transaction_mutex); return ret; } +#if !defined(CONFIG_XEN) && !defined(MODULE) EXPORT_SYMBOL(xenbus_dev_request_and_reply); +#endif /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ static void *xs_talkv(struct xenbus_transaction t, @@ -295,7 +305,7 @@ char *p, **ret; /* Count the strings. */ - *num = count_strings(strings, len); + *num = count_strings(strings, len) + 1; /* Transfer to one big alloc for easy freeing. */ ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); @@ -309,6 +319,7 @@ strings = (char *)&ret[*num]; for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) ret[(*num)++] = p; + ret[*num] = strings + len; return ret; } @@ -499,7 +510,7 @@ #define PRINTF_BUFFER_SIZE 4096 char *printf_buffer; - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH); if (printf_buffer == NULL) return -ENOMEM; @@ -622,6 +633,10 @@ char token[sizeof(watch) * 2 + 1]; int err; +#if defined(CONFIG_XEN) || defined(MODULE) + BUG_ON(watch->flags & XBWF_new_thread); +#endif + sprintf(token, "%lX", (long)watch); down_read(&xs_state.watch_mutex); @@ -673,7 +688,9 @@ struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; +#if !defined(CONFIG_XEN) && !defined(MODULE) xb_init_comms(); +#endif mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); @@ -696,11 +713,32 @@ up_write(&xs_state.transaction_mutex); } +#if defined(CONFIG_XEN) || defined(MODULE) +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; +} +#endif + static int xenwatch_thread(void *unused) { struct list_head *ent; struct xs_stored_msg *msg; + current->flags |= PF_NOFREEZE; for (;;) { wait_event_interruptible(watch_events_waitq, !list_empty(&watch_events)); @@ -716,17 +754,39 @@ list_del(ent); spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (ent == &watch_events) { + mutex_unlock(&xenwatch_mutex); + continue; } + msg = list_entry(ent, struct xs_stored_msg, list); + +#if defined(CONFIG_XEN) || defined(MODULE) + /* + * Unlock the mutex before running an XBWF_new_thread + * handler. kthread_run can block which can deadlock + * against unregister_xenbus_watch() if we need to + * unregister other watches in order to make + * progress. This can occur on resume before the swap + * device is attached. + */ + if (msg->u.watch.handle->flags & XBWF_new_thread) { + mutex_unlock(&xenwatch_mutex); + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + } else { + xenwatch_handle_callback(msg); + mutex_unlock(&xenwatch_mutex); + } +#else + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); mutex_unlock(&xenwatch_mutex); + kfree(msg->u.watch.vec); + kfree(msg); +#endif } return 0; @@ -820,6 +880,7 @@ { int err; + current->flags |= PF_NOFREEZE; for (;;) { err = process_msg(); if (err) @@ -834,7 +895,6 @@ int xs_init(void) { - int err; struct task_struct *task; INIT_LIST_HEAD(&xs_state.reply_list); @@ -846,11 +906,6 @@ init_rwsem(&xs_state.transaction_mutex); init_rwsem(&xs_state.watch_mutex); - /* Initialize the shared memory rings to talk to xenstored */ - err = xb_init_comms(); - if (err) - return err; - task = kthread_run(xenwatch_thread, NULL, "xenwatch"); if (IS_ERR(task)) return PTR_ERR(task); --- linux-ec2-2.6.31.orig/drivers/xen/blkback/vbd.c +++ linux-ec2-2.6.31/drivers/xen/blkback/vbd.c @@ -0,0 +1,122 @@ +/****************************************************************************** + * blkback/vbd.c + * + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly?VDISK_READONLY:0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_logical_block_size(vbd->bdev); +} + +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = open_by_devnum(vbd->pdevice, + vbd->readonly ? FMODE_READ : FMODE_WRITE); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, + vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (vbd->bdev == NULL) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} --- linux-ec2-2.6.31.orig/drivers/xen/blkback/interface.c +++ linux-ec2-2.6.31/drivers/xen/blkback/interface.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include + +static struct kmem_cache *blkif_cachep; + +blkif_t *blkif_alloc(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) + { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(blkif_t *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(blkif_t *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL); +} --- linux-ec2-2.6.31.orig/drivers/xen/blkback/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/blkback/xenbus.c @@ -0,0 +1,549 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" +#include "../core/domctl.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(blkif_t *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/**************************************************************** + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->backend_cdrom_watch.node) { + unregister_xenbus_watch(&be->backend_cdrom_watch); + kfree(be->backend_cdrom_watch.node); + be->backend_cdrom_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_path2(dev, dev->nodename, "physical-device", + &be->backend_watch, backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle = simple_strtoul(p, NULL, 0); + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + + /* Add watch for cdrom media status if necessay */ + cdrom_add_media_watch(be); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) { + strcpy(protocol, "unspecified"); + be->blkif->blk_protocol = xen_guest_blkif_protocol(be->blkif->domid); + } + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; +#if 1 /* maintain compatibility with early sles10-sp1 and paravirt netware betas */ + else if (0 == strcmp(protocol, "1")) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, "2")) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; +#endif + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +void blkif_xenbus_init(void) +{ + if (xenbus_register_backend(&blkback)) + BUG(); +} --- linux-ec2-2.6.31.orig/drivers/xen/blkback/blkback-pagemap.h +++ linux-ec2-2.6.31/drivers/xen/blkback/blkback-pagemap.h @@ -0,0 +1,37 @@ +#ifndef _BLKBACK_PAGEMAP_H_ +#define _BLKBACK_PAGEMAP_H_ + +#include +#include +#include + +typedef unsigned int busid_t; + +struct blkback_pagemap { + domid_t domid; + busid_t busid; + grant_ref_t gref; +}; + +#if defined(CONFIG_XEN_BLKBACK_PAGEMAP) || defined(CONFIG_XEN_BLKBACK_PAGEMAP_MODULE) + +int blkback_pagemap_init(int); +void blkback_pagemap_set(int, struct page *, domid_t, busid_t, grant_ref_t); +void blkback_pagemap_clear(struct page *); +struct blkback_pagemap blkback_pagemap_read(struct page *); + +#else /* CONFIG_XEN_BLKBACK_PAGEMAP */ + +static inline int blkback_pagemap_init(int pages) { return 0; } +static inline void blkback_pagemap_set(int idx, struct page *page, domid_t dom, + busid_t bus, grant_ref_t gnt) {} +static inline void blkback_pagemap_clear(struct page *page) {} +static inline struct blkback_pagemap blkback_pagemap_read(struct page *page) +{ + BUG(); + return (struct blkback_pagemap){-1, -1, -1}; +} + +#endif /* CONFIG_XEN_BLKBACK_PAGEMAP */ + +#endif --- linux-ec2-2.6.31.orig/drivers/xen/blkback/cdrom.c +++ linux-ec2-2.6.31/drivers/xen/blkback/cdrom.c @@ -0,0 +1,162 @@ +/****************************************************************************** + * blkback/cdrom.c + * + * Routines for managing cdrom watch and media-present attribute of a + * cdrom type virtual block device (VBD). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * Copyright (c) 2007 Pat Campbell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#undef DPRINTK +#define DPRINTK(_f, _a...) \ + printk("(%s() file=%s, line=%d) " _f "\n", \ + __PRETTY_FUNCTION__, __FILE__ , __LINE__ , ##_a ) + + +#define MEDIA_PRESENT "media-present" + +static void cdrom_media_changed(struct xenbus_watch *, const char **, unsigned int); + +/** + * Writes media-present=1 attribute for the given vbd device if not + * already there + */ +static int cdrom_xenstore_write_media_present(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + struct xenbus_transaction xbt; + int err; + int media_present; + + err = xenbus_scanf(XBT_NIL, dev->nodename, MEDIA_PRESENT, "%d", + &media_present); + if (0 < err) { + DPRINTK("already written err%d", err); + return(0); + } + media_present = 1; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return(-1); + } + + err = xenbus_printf(xbt, dev->nodename, MEDIA_PRESENT, "%d", media_present ); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/%s", + dev->nodename, MEDIA_PRESENT); + goto abort; + } + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + return 0; + abort: + xenbus_transaction_end(xbt, 1); + return -1; +} + +/** + * + */ +static int cdrom_is_type(struct backend_info *be) +{ + DPRINTK("type:%x", be->blkif->vbd.type ); + return (be->blkif->vbd.type & VDISK_CDROM) + && (be->blkif->vbd.type & GENHD_FL_REMOVABLE); +} + +/** + * + */ +void cdrom_add_media_watch(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + int err; + + DPRINTK("nodename:%s", dev->nodename); + if (cdrom_is_type(be)) { + DPRINTK("is a cdrom"); + if ( cdrom_xenstore_write_media_present(be) == 0 ) { + DPRINTK( "xenstore wrote OK"); + err = xenbus_watch_path2(dev, dev->nodename, MEDIA_PRESENT, + &be->backend_cdrom_watch, + cdrom_media_changed); + if (err) + DPRINTK( "media_present watch add failed" ); + } + } +} + +/** + * Callback received when the "media_present" xenstore node is changed + */ +static void cdrom_media_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned media_present; + struct backend_info *be + = container_of(watch, struct backend_info, backend_cdrom_watch); + struct xenbus_device *dev = be->dev; + + if (!cdrom_is_type(be)) { + DPRINTK("callback not for a cdrom" ); + return; + } + + err = xenbus_scanf(XBT_NIL, dev->nodename, MEDIA_PRESENT, "%d", + &media_present); + if (err == 0 || err == -ENOENT) { + DPRINTK("xenbus_read of cdrom media_present node error:%d",err); + return; + } + + if (media_present == 0) + vbd_free(&be->blkif->vbd); + else { + char *p = strrchr(dev->otherend, '/') + 1; + long handle = simple_strtoul(p, NULL, 0); + + if (!be->blkif->vbd.bdev) { + err = vbd_create(be->blkif, handle, be->major, be->minor, + !strchr(be->mode, 'w'), 1); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + } + } +} --- linux-ec2-2.6.31.orig/drivers/xen/blkback/blkback.c +++ linux-ec2-2.6.31/drivers/xen/blkback/blkback.c @@ -0,0 +1,673 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/blkif/frontend + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define BLKBACK_INVALID_HANDLE (~0) + +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static void unplug_queue(blkif_t *blkif) +{ + if (blkif->plug == NULL) + return; + if (blkif->plug->unplug_fn) + blkif->plug->unplug_fn(blkif->plug); + kobject_put(&blkif->plug->kobj); + blkif->plug = NULL; +} + +static void plug_queue(blkif_t *blkif, struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q == blkif->plug) + return; + unplug_queue(blkif); + WARN_ON(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)); + kobject_get(&q->kobj); + blkif->plug = q; +} + +static void fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + blkback_pagemap_clear(virt_to_page(vaddr(req, i))); + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d | pk %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req, + blkif->st_pk_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; + blkif->st_pk_req = 0; +} + +int blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + unplug_queue(blkif); + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + +static void __end_block_io_op(pending_req_t *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + if (atomic_dec_and_test(&pending_req->pendcnt)) { + fast_flush_area(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +static void end_block_io_op(struct bio *bio, int error) +{ + __end_block_io_op(bio->bi_private, error); + bio_put(bio); +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + blkif_request_t req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_PACKET: + DPRINTK("error: block operation BLKIF_OP_PACKET not implemented\n"); + blkif->st_pk_req++; + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct phys_req preq; + struct { + unsigned long buf; unsigned int nsec; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + int ret, i; + int operation; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + uint32_t flags; + + seg[i].nsec = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->seg[i].last_sect < req->seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + flags = GNTMAP_host_map; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } else { + blkback_pagemap_set(vaddr_pagenr(pending_req, i), + virt_to_page(vaddr(pending_req, i)), + blkif->domid, req->handle, + req->seg[i].gref); + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + seg[i].buf = map[i].dev_bus_addr | + (req->seg[i].first_sect << 9); + } + + if (ret) + goto fail_flush; + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_flush; + } + + plug_queue(blkif, preq.bdev); + atomic_set(&pending_req->pendcnt, 1); + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_put_bio; + } + + while ((bio == NULL) || + (bio_add_page(bio, + virt_to_page(vaddr(pending_req, i)), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + if (bio) { + atomic_inc(&pending_req->pendcnt); + submit_bio(operation, bio); + } + + bio = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + submit_bio(operation, bio); + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + fast_flush_area(pending_req); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + __end_block_io_op(pending_req, -EINVAL); + if (bio) + bio_put(bio); + unplug_queue(blkif); + msleep(1); /* back off a bit */ + return; +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st) +{ + blkif_response_t resp; + unsigned long flags; + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (blkback_pagemap_init(mmap_pages)) + goto out_of_memory; + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + + blkif_interface_init(); + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); + + blkif_xenbus_init(); + + return 0; + + out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/blkback/Makefile +++ linux-ec2-2.6.31/drivers/xen/blkback/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o +obj-$(CONFIG_XEN_BLKBACK_PAGEMAP) += blkback-pagemap.o + +blkbk-y := blkback.o xenbus.o interface.o vbd.o cdrom.o --- linux-ec2-2.6.31.orig/drivers/xen/blkback/blkback-pagemap.c +++ linux-ec2-2.6.31/drivers/xen/blkback/blkback-pagemap.c @@ -0,0 +1,96 @@ +#include +#include "blkback-pagemap.h" + +static int blkback_pagemap_size; +static struct blkback_pagemap *blkback_pagemap; + +static inline int +blkback_pagemap_entry_clear(struct blkback_pagemap *map) +{ + static struct blkback_pagemap zero; + return !memcmp(map, &zero, sizeof(zero)); +} + +int +blkback_pagemap_init(int pages) +{ + blkback_pagemap = kzalloc(pages * sizeof(struct blkback_pagemap), + GFP_KERNEL); + if (!blkback_pagemap) + return -ENOMEM; + + blkback_pagemap_size = pages; + return 0; +} +EXPORT_SYMBOL_GPL(blkback_pagemap_init); + +void +blkback_pagemap_set(int idx, struct page *page, + domid_t domid, busid_t busid, grant_ref_t gref) +{ + struct blkback_pagemap *entry; + + BUG_ON(!blkback_pagemap); + BUG_ON(idx >= blkback_pagemap_size); + + SetPageBlkback(page); + set_page_private(page, idx); + + entry = blkback_pagemap + idx; + if (!blkback_pagemap_entry_clear(entry)) { + printk("overwriting pagemap %d: d %u b %u g %u\n", + idx, entry->domid, entry->busid, entry->gref); + BUG(); + } + + entry->domid = domid; + entry->busid = busid; + entry->gref = gref; +} +EXPORT_SYMBOL_GPL(blkback_pagemap_set); + +void +blkback_pagemap_clear(struct page *page) +{ + int idx; + struct blkback_pagemap *entry; + + idx = (int)page_private(page); + + BUG_ON(!blkback_pagemap); + BUG_ON(!PageBlkback(page)); + BUG_ON(idx >= blkback_pagemap_size); + + entry = blkback_pagemap + idx; + if (blkback_pagemap_entry_clear(entry)) { + printk("clearing empty pagemap %d\n", idx); + BUG(); + } + + memset(entry, 0, sizeof(*entry)); +} +EXPORT_SYMBOL_GPL(blkback_pagemap_clear); + +struct blkback_pagemap +blkback_pagemap_read(struct page *page) +{ + int idx; + struct blkback_pagemap *entry; + + idx = (int)page_private(page); + + BUG_ON(!blkback_pagemap); + BUG_ON(!PageBlkback(page)); + BUG_ON(idx >= blkback_pagemap_size); + + entry = blkback_pagemap + idx; + if (blkback_pagemap_entry_clear(entry)) { + printk("reading empty pagemap %d\n", idx); + BUG(); + } + + return *entry; +} +EXPORT_SYMBOL(blkback_pagemap_read); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/blkback/common.h +++ linux-ec2-2.6.31/drivers/xen/blkback/common.h @@ -0,0 +1,156 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blkback-pagemap.h" + + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + blkif_back_rings_t blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + struct request_queue *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_pk_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +} blkif_t; + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + struct xenbus_watch backend_cdrom_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +blkif_t *blkif_alloc(domid_t domid); +void blkif_disconnect(blkif_t *blkif); +void blkif_free(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); + +void blkif_interface_init(void); + +void blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +/* cdrom media change */ +void cdrom_add_media_watch(struct backend_info *be); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/xenoprof/xenoprofile.c +++ linux-ec2-2.6.31/drivers/xen/xenoprof/xenoprofile.c @@ -0,0 +1,542 @@ +/** + * @file xenoprofile.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon and Jose Renato Santos for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * + * Separated out arch-generic part + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../drivers/oprofile/event_buffer.h" + +#define MAX_XENOPROF_SAMPLES 16 + +/* sample buffers shared with Xen */ +static xenoprof_buf_t *xenoprof_buf[MAX_VIRT_CPUS]; +/* Shared buffer area */ +static struct xenoprof_shared_buffer shared_buffer; + +/* Passive sample buffers shared with Xen */ +static xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS]; +/* Passive shared buffer area */ +static struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS]; + +static int xenoprof_start(void); +static void xenoprof_stop(void); + +static int xenoprof_enabled = 0; +static int xenoprof_is_primary = 0; +static int active_defined; + +extern unsigned long oprofile_backtrace_depth; + +/* Number of buffers in shared area (one per VCPU) */ +static int nbuf; +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */ +static int ovf_irq[NR_CPUS]; +/* cpu model type string - copied from Xen on XENOPROF_init command */ +static char cpu_type[XENOPROF_CPU_TYPE_SIZE]; + +#ifdef CONFIG_PM + +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state) +{ + if (xenoprof_enabled == 1) + xenoprof_stop(); + return 0; +} + + +static int xenoprof_resume(struct sys_device * dev) +{ + if (xenoprof_enabled == 1) + xenoprof_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + .name = "oprofile", + .resume = xenoprof_resume, + .suspend = xenoprof_suspend +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_driverfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void exit_driverfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_driverfs() do { } while (0) +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +static unsigned long long oprofile_samples; +static unsigned long long p_oprofile_samples; + +static unsigned int pdomains; +static struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS]; + +/* Check whether the given entry is an escape code */ +static int xenoprof_is_escape(xenoprof_buf_t * buf, int tail) +{ + return (buf->event_log[tail].eip == XENOPROF_ESCAPE_CODE); +} + +/* Get the event at the given entry */ +static uint8_t xenoprof_get_event(xenoprof_buf_t * buf, int tail) +{ + return (buf->event_log[tail].event); +} + +static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive) +{ + int head, tail, size; + int tracing = 0; + + head = buf->event_head; + tail = buf->event_tail; + size = buf->event_size; + + while (tail != head) { + if (xenoprof_is_escape(buf, tail) && + xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) { + tracing=1; + oprofile_add_mode(buf->event_log[tail].mode); + if (!is_passive) + oprofile_samples++; + else + p_oprofile_samples++; + + } else { + oprofile_add_pc(buf->event_log[tail].eip, + buf->event_log[tail].mode, + buf->event_log[tail].event); + if (!tracing) { + if (!is_passive) + oprofile_samples++; + else + p_oprofile_samples++; + } + + } + tail++; + if(tail==size) + tail=0; + } + buf->event_tail = tail; +} + +static void xenoprof_handle_passive(void) +{ + int i, j; + int flag_domain, flag_switch = 0; + + for (i = 0; i < pdomains; i++) { + flag_domain = 0; + for (j = 0; j < passive_domains[i].nbuf; j++) { + xenoprof_buf_t *buf = p_xenoprof_buf[i][j]; + if (buf->event_head == buf->event_tail) + continue; + if (!flag_domain) { + if (!oprofile_add_domain_switch( + passive_domains[i].domain_id)) + goto done; + flag_domain = 1; + } + xenoprof_add_pc(buf, 1); + flag_switch = 1; + } + } +done: + if (flag_switch) + oprofile_add_domain_switch(COORDINATOR_DOMAIN); +} + +static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id) +{ + struct xenoprof_buf * buf; + static unsigned long flag; + + buf = xenoprof_buf[smp_processor_id()]; + + xenoprof_add_pc(buf, 0); + + if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) { + xenoprof_handle_passive(); + smp_mb__before_clear_bit(); + clear_bit(0, &flag); + } + + return IRQ_HANDLED; +} + +static struct irqaction ovf_action = { + .handler = xenoprof_ovf_interrupt, + .flags = IRQF_DISABLED, + .name = "xenoprof" +}; + +static void unbind_virq(void) +{ + unsigned int i; + + for_each_online_cpu(i) { + if (ovf_irq[i] >= 0) { + unbind_from_per_cpu_irq(ovf_irq[i], i, &ovf_action); + ovf_irq[i] = -1; + } + } +} + + +static int bind_virq(void) +{ + unsigned int i; + int result; + + for_each_online_cpu(i) { + result = bind_virq_to_irqaction(VIRQ_XENOPROF, i, &ovf_action); + + if (result < 0) { + unbind_virq(); + return result; + } + + ovf_irq[i] = result; + } + + return 0; +} + + +static void unmap_passive_list(void) +{ + int i; + for (i = 0; i < pdomains; i++) + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]); + pdomains = 0; +} + + +static int map_xenoprof_buffer(int max_samples) +{ + struct xenoprof_get_buffer get_buffer; + struct xenoprof_buf *buf; + int ret, i; + + if ( shared_buffer.buffer ) + return 0; + + get_buffer.max_samples = max_samples; + ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer); + if (ret) + return ret; + nbuf = get_buffer.nbuf; + + for (i=0; i< nbuf; i++) { + buf = (struct xenoprof_buf*) + &shared_buffer.buffer[i * get_buffer.bufsize]; + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); + xenoprof_buf[buf->vcpu_id] = buf; + } + + return 0; +} + + +static int xenoprof_setup(void) +{ + int ret; + + if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) ) + return ret; + + if ( (ret = bind_virq()) ) + return ret; + + if (xenoprof_is_primary) { + /* Define dom0 as an active domain if not done yet */ + if (!active_defined) { + domid_t domid; + ret = HYPERVISOR_xenoprof_op( + XENOPROF_reset_active_list, NULL); + if (ret) + goto err; + domid = 0; + ret = HYPERVISOR_xenoprof_op( + XENOPROF_set_active, &domid); + if (ret) + goto err; + active_defined = 1; + } + + if (oprofile_backtrace_depth > 0) { + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace, + &oprofile_backtrace_depth); + if (ret) + oprofile_backtrace_depth = 0; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL); + if (ret) + goto err; + + xenoprof_arch_counter(); + ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL); + if (ret) + goto err; + } + + ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL); + if (ret) + goto err; + + xenoprof_enabled = 1; + return 0; + err: + unbind_virq(); + return ret; +} + + +static void xenoprof_shutdown(void) +{ + xenoprof_enabled = 0; + + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL)); + + if (xenoprof_is_primary) { + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_release_counters, + NULL)); + active_defined = 0; + } + + unbind_virq(); + + xenoprof_arch_unmap_shared_buffer(&shared_buffer); + if (xenoprof_is_primary) + unmap_passive_list(); +} + + +static int xenoprof_start(void) +{ + int ret = 0; + + if (xenoprof_is_primary) + ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL); + if (!ret) + xenoprof_arch_start(); + return ret; +} + + +static void xenoprof_stop(void) +{ + if (xenoprof_is_primary) + WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL)); + xenoprof_arch_stop(); +} + + +static int xenoprof_set_active(int * active_domains, + unsigned int adomains) +{ + int ret = 0; + int i; + int set_dom0 = 0; + domid_t domid; + + if (!xenoprof_is_primary) + return 0; + + if (adomains > MAX_OPROF_DOMAINS) + return -E2BIG; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL); + if (ret) + return ret; + + for (i=0; i MAX_OPROF_DOMAINS) + return -E2BIG; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL); + if (ret) + return ret; + unmap_passive_list(); + + for (i = 0; i < pdoms; i++) { + passive_domains[i].domain_id = p_domains[i]; + passive_domains[i].max_samples = 2048; + ret = xenoprof_arch_set_passive(&passive_domains[i], + &p_shared_buffer[i]); + if (ret) + goto out; + for (j = 0; j < passive_domains[i].nbuf; j++) { + buf = (struct xenoprof_buf *) + &p_shared_buffer[i].buffer[ + j * passive_domains[i].bufsize]; + BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS); + p_xenoprof_buf[i][buf->vcpu_id] = buf; + } + } + + pdomains = pdoms; + return 0; + +out: + for (j = 0; j < i; j++) + xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]); + + return ret; +} + + +/* The dummy backtrace function to keep oprofile happy + * The real backtrace is done in xen + */ +static void xenoprof_dummy_backtrace(struct pt_regs * const regs, + unsigned int depth) +{ + /* this should never be called */ + BUG(); + return; +} + + +static struct oprofile_operations xenoprof_ops = { +#ifdef HAVE_XENOPROF_CREATE_FILES + .create_files = xenoprof_create_files, +#endif + .set_active = xenoprof_set_active, + .set_passive = xenoprof_set_passive, + .setup = xenoprof_setup, + .shutdown = xenoprof_shutdown, + .start = xenoprof_start, + .stop = xenoprof_stop, + .backtrace = xenoprof_dummy_backtrace +}; + + +/* in order to get driverfs right */ +static int using_xenoprof; + +int __init xenoprofile_init(struct oprofile_operations * ops) +{ + struct xenoprof_init init; + unsigned int i; + int ret; + + ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); + if (!ret) { + xenoprof_arch_init_counter(&init); + xenoprof_is_primary = init.is_primary; + + /* cpu_type is detected by Xen */ + cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0; + strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1); + xenoprof_ops.cpu_type = cpu_type; + + init_driverfs(); + using_xenoprof = 1; + *ops = xenoprof_ops; + + for (i=0; i + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline int uncached_access(struct file *file) +{ + if (file->f_flags & O_SYNC) + return 1; + /* Xen sets correct MTRR type on non-RAM for us. */ + return 0; +} + +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ +#ifdef CONFIG_STRICT_DEVMEM + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) { + printk(KERN_INFO + "Program %s tried to access /dev/mem between %Lx->%Lx.\n", + current->comm, from, to); + return 0; + } + cursor += PAGE_SIZE; + pfn++; + } +#endif + return 1; +} + +/* + * This funcion reads the *physical* memory. The f_pos points directly to the + * memory location. + */ +static ssize_t read_mem(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos, ignored; + ssize_t read = 0, sz; + void __iomem *v; + + while (count > 0) { + /* + * Handle first page in case it's not aligned + */ + if (-p & (PAGE_SIZE - 1)) + sz = -p & (PAGE_SIZE - 1); + else + sz = PAGE_SIZE; + + sz = min_t(unsigned long, sz, count); + + if (!range_is_allowed(p >> PAGE_SHIFT, count)) + return -EPERM; + + v = ioremap(p, sz); + if (IS_ERR(v) || v == NULL) { + /* + * Some programs (e.g., dmidecode) groove off into + * weird RAM areas where no tables can possibly exist + * (because Xen will have stomped on them!). These + * programs get rather upset if we let them know that + * Xen failed their access, so we fake out a read of + * all zeroes. + */ + if (clear_user(buf, count)) + return -EFAULT; + read += count; + break; + } + + ignored = copy_to_user(buf, v, sz); + iounmap(v); + if (ignored) + return -EFAULT; + buf += sz; + p += sz; + count -= sz; + read += sz; + } + + *ppos += read; + return read; +} + +static ssize_t write_mem(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos, ignored; + ssize_t written = 0, sz; + void __iomem *v; + + while (count > 0) { + /* + * Handle first page in case it's not aligned + */ + if (-p & (PAGE_SIZE - 1)) + sz = -p & (PAGE_SIZE - 1); + else + sz = PAGE_SIZE; + + sz = min_t(unsigned long, sz, count); + + if (!range_is_allowed(p >> PAGE_SHIFT, sz)) + return -EPERM; + + v = ioremap(p, sz); + if (v == NULL) + break; + if (IS_ERR(v)) { + if (written == 0) + return PTR_ERR(v); + break; + } + + ignored = copy_from_user(v, buf, sz); + iounmap(v); + if (ignored) { + written += sz - ignored; + if (written) + break; + return -EFAULT; + } + buf += sz; + p += sz; + count -= sz; + written += sz; + } + + *ppos += written; + return written; +} + +#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM +static struct vm_operations_struct mmap_mem_ops = { +#ifdef CONFIG_HAVE_IOREMAP_PROT + .access = generic_access_phys +#endif +}; + +static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (uncached_access(file)) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (!range_is_allowed(vma->vm_pgoff, size)) + return -EPERM; + + if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, + &vma->vm_page_prot)) + return -EINVAL; + + vma->vm_ops = &mmap_mem_ops; + + /* We want to return the real error code, not EAGAIN. */ + return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + size, vma->vm_page_prot, DOMID_IO); +} +#endif + +/* + * The memory devices use the full 32/64 bits of the offset, and so we cannot + * check against negative addresses: they are ok. The return value is weird, + * though, in that case (0). + * + * also note that seeking relative to the "end of file" isn't supported: + * it has no meaning, so it returns -EINVAL. + */ +static loff_t memory_lseek(struct file * file, loff_t offset, int orig) +{ + loff_t ret; + + mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + switch (orig) { + case 0: + file->f_pos = offset; + ret = file->f_pos; + force_successful_syscall_return(); + break; + case 1: + file->f_pos += offset; + ret = file->f_pos; + force_successful_syscall_return(); + break; + default: + ret = -EINVAL; + } + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + return ret; +} + +static int open_mem(struct inode * inode, struct file * filp) +{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +const struct file_operations mem_fops = { + .llseek = memory_lseek, + .read = read_mem, + .write = write_mem, + .mmap = xen_mmap_mem, + .open = open_mem, +}; --- linux-ec2-2.6.31.orig/drivers/xen/char/Makefile +++ linux-ec2-2.6.31/drivers/xen/char/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_XEN_DEVMEM) := mem.o --- linux-ec2-2.6.31.orig/drivers/xen/tpmback/interface.c +++ linux-ec2-2.6.31/drivers/xen/tpmback/interface.c @@ -0,0 +1,170 @@ + /***************************************************************************** + * drivers/xen/tpmback/interface.c + * + * Vritual TPM interface management. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * + * This code has been derived from drivers/xen/netback/interface.c + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" +#include +#include +#include + +static struct kmem_cache *tpmif_cachep; +int num_frontends = 0; + +LIST_HEAD(tpmif_list); + +static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi) +{ + tpmif_t *tpmif; + + tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL); + if (tpmif == NULL) + goto out_of_memory; + + memset(tpmif, 0, sizeof (*tpmif)); + tpmif->domid = domid; + tpmif->status = DISCONNECTED; + tpmif->bi = bi; + snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid); + atomic_set(&tpmif->refcnt, 1); + + tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE); + if (tpmif->mmap_pages == NULL) + goto out_of_memory; + + list_add(&tpmif->tpmif_list, &tpmif_list); + num_frontends++; + + return tpmif; + + out_of_memory: + if (tpmif != NULL) + kmem_cache_free(tpmif_cachep, tpmif); + printk("%s: out of memory\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); +} + +static void free_tpmif(tpmif_t * tpmif) +{ + num_frontends--; + list_del(&tpmif->tpmif_list); + free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE); + kmem_cache_free(tpmif_cachep, tpmif); +} + +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi) +{ + tpmif_t *tpmif; + + list_for_each_entry(tpmif, &tpmif_list, tpmif_list) { + if (tpmif->bi == bi) { + if (tpmif->domid == domid) { + tpmif_get(tpmif); + return tpmif; + } else { + return ERR_PTR(-EEXIST); + } + } + } + + return alloc_tpmif(domid, bi); +} + +static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, shared_page, tpmif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + tpmif->shmem_ref = shared_page; + tpmif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(tpmif_t *tpmif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr, + GNTMAP_host_map, tpmif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + + if (tpmif->irq) + return 0; + + if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL) + return -ENOMEM; + + err = map_frontend_page(tpmif, shared_page); + if (err) { + free_vm_area(tpmif->tx_area); + return err; + } + + tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr; + memset(tpmif->tx, 0, PAGE_SIZE); + + err = bind_interdomain_evtchn_to_irqhandler( + tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif); + if (err < 0) { + unmap_frontend_page(tpmif); + free_vm_area(tpmif->tx_area); + return err; + } + tpmif->irq = err; + + tpmif->shmem_ref = shared_page; + tpmif->active = 1; + + return 0; +} + +void tpmif_disconnect_complete(tpmif_t *tpmif) +{ + if (tpmif->irq) + unbind_from_irqhandler(tpmif->irq, tpmif); + + if (tpmif->tx) { + unmap_frontend_page(tpmif); + free_vm_area(tpmif->tx_area); + } + + free_tpmif(tpmif); +} + +int __init tpmif_interface_init(void) +{ + tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), + 0, 0, NULL); + return tpmif_cachep ? 0 : -ENOMEM; +} + +void tpmif_interface_exit(void) +{ + kmem_cache_destroy(tpmif_cachep); +} --- linux-ec2-2.6.31.orig/drivers/xen/tpmback/xenbus.c +++ linux-ec2-2.6.31/drivers/xen/tpmback/xenbus.c @@ -0,0 +1,288 @@ +/* Xenbus code for tpmif backend + Copyright (C) 2005 IBM Corporation + Copyright (C) 2005 Rusty Russell + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include +#include "common.h" + +struct backend_info +{ + struct xenbus_device *dev; + + /* our communications channel */ + tpmif_t *tpmif; + + long int frontend_id; + long int instance; // instance of TPM + u8 is_instance_set;// whether instance number has been set + + /* watch front end for changes */ + struct xenbus_watch backend_watch; +}; + +static void maybe_connect(struct backend_info *be); +static void connect(struct backend_info *be); +static int connect_ring(struct backend_info *be); +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len); +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state); + +long int tpmback_get_instance(struct backend_info *bi) +{ + long int res = -1; + if (bi && bi->is_instance_set) + res = bi->instance; + return res; +} + +static int tpmback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + if (!be) return 0; + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + if (be->tpmif) { + be->tpmif->bi = NULL; + vtpm_release_packets(be->tpmif, 0); + tpmif_put(be->tpmif); + be->tpmif = NULL; + } + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +static int tpmback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->is_instance_set = 0; + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + err = xenbus_watch_path2(dev, dev->nodename, + "instance", &be->backend_watch, + backend_changed); + if (err) { + goto fail; + } + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) { + goto fail; + } + return 0; +fail: + tpmback_remove(dev); + return err; +} + + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + long instance; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + + err = xenbus_scanf(XBT_NIL, dev->nodename, + "instance","%li", &instance); + if (XENBUS_EXIST_ERR(err)) { + return; + } + + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading instance"); + return; + } + + if (be->is_instance_set == 0) { + be->instance = instance; + be->is_instance_set = 1; + } +} + + +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + switch (frontend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + break; + + case XenbusStateConnected: + err = connect_ring(be); + if (err) { + return; + } + maybe_connect(be); + break; + + case XenbusStateClosing: + be->instance = -1; + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateUnknown: /* keep it here */ + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + device_unregister(&be->dev->dev); + tpmback_remove(dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, + "saw state %d at frontend", + frontend_state); + break; + } +} + + + +static void maybe_connect(struct backend_info *be) +{ + if (be->tpmif == NULL || be->tpmif->status == CONNECTED) + return; + + connect(be); +} + + +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + unsigned long ready = 1; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(be->dev, err, "starting transaction"); + return; + } + + err = xenbus_printf(xbt, be->dev->nodename, + "ready", "%lu", ready); + if (err) { + xenbus_dev_fatal(be->dev, err, "writing 'ready'"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(be->dev, err, "end of transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (!err) + be->tpmif->status = CONNECTED; + return; +abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + int err; + + err = xenbus_gather(XBT_NIL, dev->otherend, + "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_error(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + if (!be->tpmif) { + be->tpmif = tpmif_find(dev->otherend_id, be); + if (IS_ERR(be->tpmif)) { + err = PTR_ERR(be->tpmif); + be->tpmif = NULL; + xenbus_dev_fatal(dev,err,"creating vtpm interface"); + return err; + } + } + + if (be->tpmif != NULL) { + err = tpmif_map(be->tpmif, ring_ref, evtchn); + if (err) { + xenbus_dev_error(dev, err, + "mapping shared-frame %lu port %u", + ring_ref, evtchn); + return err; + } + } + return 0; +} + + +static const struct xenbus_device_id tpmback_ids[] = { + { "vtpm" }, + { "" } +}; + + +static struct xenbus_driver tpmback = { + .name = "vtpm", + .ids = tpmback_ids, + .probe = tpmback_probe, + .remove = tpmback_remove, + .otherend_changed = frontend_changed, +}; + + +int tpmif_xenbus_init(void) +{ + return xenbus_register_backend(&tpmback); +} + +void tpmif_xenbus_exit(void) +{ + xenbus_unregister_driver(&tpmback); +} --- linux-ec2-2.6.31.orig/drivers/xen/tpmback/tpmback.c +++ linux-ec2-2.6.31/drivers/xen/tpmback/tpmback.c @@ -0,0 +1,952 @@ +/****************************************************************************** + * drivers/xen/tpmback/tpmback.c + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from drivers/xen/netback/netback.c + * Copyright (c) 2002-2004, K A Fraser + * + */ + +#include "common.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* local data structures */ +struct data_exchange { + struct list_head pending_pak; + struct list_head current_pak; + unsigned int copied_so_far; + u8 has_opener:1; + u8 aborted:1; + rwlock_t pak_lock; // protects all of the previous fields + wait_queue_head_t wait_queue; +}; + +struct vtpm_resp_hdr { + uint32_t instance_no; + uint16_t tag_no; + uint32_t len_no; + uint32_t ordinal_no; +} __attribute__ ((packed)); + +struct packet { + struct list_head next; + unsigned int data_len; + u8 *data_buffer; + tpmif_t *tpmif; + u32 tpm_instance; + u8 req_tag; + u32 last_read; + u8 flags; + struct timer_list processing_timer; +}; + +enum { + PACKET_FLAG_DISCARD_RESPONSE = 1, +}; + +/* local variables */ +static struct data_exchange dataex; + +/* local function prototypes */ +static int _packet_write(struct packet *pak, + const char *data, size_t size, int userbuffer); +static void processing_timeout(unsigned long ptr); +static int packet_read_shmem(struct packet *pak, + tpmif_t * tpmif, + u32 offset, + char *buffer, int isuserbuffer, u32 left); +static int vtpm_queue_packet(struct packet *pak); + +/*************************************************************** + Buffer copying fo user and kernel space buffes. +***************************************************************/ +static inline int copy_from_buffer(void *to, + const void *from, unsigned long size, + int isuserbuffer) +{ + if (isuserbuffer) { + if (copy_from_user(to, (void __user *)from, size)) + return -EFAULT; + } else { + memcpy(to, from, size); + } + return 0; +} + +static inline int copy_to_buffer(void *to, + const void *from, unsigned long size, + int isuserbuffer) +{ + if (isuserbuffer) { + if (copy_to_user((void __user *)to, from, size)) + return -EFAULT; + } else { + memcpy(to, from, size); + } + return 0; +} + + +static void dataex_init(struct data_exchange *dataex) +{ + INIT_LIST_HEAD(&dataex->pending_pak); + INIT_LIST_HEAD(&dataex->current_pak); + dataex->has_opener = 0; + rwlock_init(&dataex->pak_lock); + init_waitqueue_head(&dataex->wait_queue); +} + +/*************************************************************** + Packet-related functions +***************************************************************/ + +static struct packet *packet_find_instance(struct list_head *head, + u32 tpm_instance) +{ + struct packet *pak; + struct list_head *p; + + /* + * traverse the list of packets and return the first + * one with the given instance number + */ + list_for_each(p, head) { + pak = list_entry(p, struct packet, next); + + if (pak->tpm_instance == tpm_instance) { + return pak; + } + } + return NULL; +} + +static struct packet *packet_find_packet(struct list_head *head, void *packet) +{ + struct packet *pak; + struct list_head *p; + + /* + * traverse the list of packets and return the first + * one with the given instance number + */ + list_for_each(p, head) { + pak = list_entry(p, struct packet, next); + + if (pak == packet) { + return pak; + } + } + return NULL; +} + +static struct packet *packet_alloc(tpmif_t * tpmif, + u32 size, u8 req_tag, u8 flags) +{ + struct packet *pak = NULL; + pak = kzalloc(sizeof (struct packet), GFP_ATOMIC); + if (NULL != pak) { + if (tpmif) { + pak->tpmif = tpmif; + pak->tpm_instance = tpmback_get_instance(tpmif->bi); + tpmif_get(tpmif); + } + pak->data_len = size; + pak->req_tag = req_tag; + pak->last_read = 0; + pak->flags = flags; + + /* + * cannot do tpmif_get(tpmif); bad things happen + * on the last tpmif_put() + */ + init_timer(&pak->processing_timer); + pak->processing_timer.function = processing_timeout; + pak->processing_timer.data = (unsigned long)pak; + } + return pak; +} + +static void inline packet_reset(struct packet *pak) +{ + pak->last_read = 0; +} + +static void packet_free(struct packet *pak) +{ + if (timer_pending(&pak->processing_timer)) { + BUG(); + } + + if (pak->tpmif) + tpmif_put(pak->tpmif); + kfree(pak->data_buffer); + /* + * cannot do tpmif_put(pak->tpmif); bad things happen + * on the last tpmif_put() + */ + kfree(pak); +} + + +/* + * Write data to the shared memory and send it to the FE. + */ +static int packet_write(struct packet *pak, + const char *data, size_t size, int isuserbuffer) +{ + int rc = 0; + + if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) { + /* Don't send a respone to this packet. Just acknowledge it. */ + rc = size; + } else { + rc = _packet_write(pak, data, size, isuserbuffer); + } + + return rc; +} + +int _packet_write(struct packet *pak, + const char *data, size_t size, int isuserbuffer) +{ + /* + * Write into the shared memory pages directly + * and send it to the front end. + */ + tpmif_t *tpmif = pak->tpmif; + grant_handle_t handle; + int rc = 0; + unsigned int i = 0; + unsigned int offset = 0; + + if (tpmif == NULL) { + return -EFAULT; + } + + if (tpmif->status == DISCONNECTED) { + return size; + } + + while (offset < size && i < TPMIF_TX_RING_SIZE) { + unsigned int tocopy; + struct gnttab_map_grant_ref map_op; + struct gnttab_unmap_grant_ref unmap_op; + tpmif_tx_request_t *tx; + + tx = &tpmif->tx->ring[i].req; + + if (0 == tx->addr) { + DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i); + return 0; + } + + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); + + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + &map_op, 1))) { + BUG(); + } + + handle = map_op.handle; + + if (map_op.status) { + DPRINTK(" Grant table operation failure !\n"); + return 0; + } + + tocopy = min_t(size_t, size - offset, PAGE_SIZE); + + if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) | + (tx->addr & ~PAGE_MASK)), + &data[offset], tocopy, isuserbuffer)) { + tpmif_put(tpmif); + return -EFAULT; + } + tx->size = tocopy; + + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i), + GNTMAP_host_map, handle); + + if (unlikely + (HYPERVISOR_grant_table_op + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) { + BUG(); + } + + offset += tocopy; + i++; + } + + rc = offset; + DPRINTK("Notifying frontend via irq %d\n", tpmif->irq); + notify_remote_via_irq(tpmif->irq); + + return rc; +} + +/* + * Read data from the shared memory and copy it directly into the + * provided buffer. Advance the read_last indicator which tells + * how many bytes have already been read. + */ +static int packet_read(struct packet *pak, size_t numbytes, + char *buffer, size_t buffersize, int isuserbuffer) +{ + tpmif_t *tpmif = pak->tpmif; + + /* + * Read 'numbytes' of data from the buffer. The first 4 + * bytes are the instance number in network byte order, + * after that come the data from the shared memory buffer. + */ + u32 to_copy; + u32 offset = 0; + u32 room_left = buffersize; + + if (pak->last_read < 4) { + /* + * copy the instance number into the buffer + */ + u32 instance_no = htonl(pak->tpm_instance); + u32 last_read = pak->last_read; + + to_copy = min_t(size_t, 4 - last_read, numbytes); + + if (copy_to_buffer(&buffer[0], + &(((u8 *) & instance_no)[last_read]), + to_copy, isuserbuffer)) { + return -EFAULT; + } + + pak->last_read += to_copy; + offset += to_copy; + room_left -= to_copy; + } + + /* + * If the packet has a data buffer appended, read from it... + */ + + if (room_left > 0) { + if (pak->data_buffer) { + u32 to_copy = min_t(u32, pak->data_len - offset, room_left); + u32 last_read = pak->last_read - 4; + + if (copy_to_buffer(&buffer[offset], + &pak->data_buffer[last_read], + to_copy, isuserbuffer)) { + return -EFAULT; + } + pak->last_read += to_copy; + offset += to_copy; + } else { + offset = packet_read_shmem(pak, + tpmif, + offset, + buffer, + isuserbuffer, room_left); + } + } + return offset; +} + +static int packet_read_shmem(struct packet *pak, + tpmif_t * tpmif, + u32 offset, char *buffer, int isuserbuffer, + u32 room_left) +{ + u32 last_read = pak->last_read - 4; + u32 i = (last_read / PAGE_SIZE); + u32 pg_offset = last_read & (PAGE_SIZE - 1); + u32 to_copy; + grant_handle_t handle; + + tpmif_tx_request_t *tx; + + tx = &tpmif->tx->ring[0].req; + /* + * Start copying data at the page with index 'index' + * and within that page at offset 'offset'. + * Copy a maximum of 'room_left' bytes. + */ + to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left); + while (to_copy > 0) { + void *src; + struct gnttab_map_grant_ref map_op; + struct gnttab_unmap_grant_ref unmap_op; + + tx = &tpmif->tx->ring[i].req; + + gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i), + GNTMAP_host_map, tx->ref, tpmif->domid); + + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + &map_op, 1))) { + BUG(); + } + + if (map_op.status) { + DPRINTK(" Grant table operation failure !\n"); + return -EFAULT; + } + + handle = map_op.handle; + + if (to_copy > tx->size) { + /* + * User requests more than what's available + */ + to_copy = min_t(u32, tx->size, to_copy); + } + + DPRINTK("Copying from mapped memory at %08lx\n", + (unsigned long)(idx_to_kaddr(tpmif, i) | + (tx->addr & ~PAGE_MASK))); + + src = (void *)(idx_to_kaddr(tpmif, i) | + ((tx->addr & ~PAGE_MASK) + pg_offset)); + if (copy_to_buffer(&buffer[offset], + src, to_copy, isuserbuffer)) { + return -EFAULT; + } + + DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n", + tpmif->domid, buffer[offset], buffer[offset + 1], + buffer[offset + 2], buffer[offset + 3]); + + gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i), + GNTMAP_host_map, handle); + + if (unlikely + (HYPERVISOR_grant_table_op + (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) { + BUG(); + } + + offset += to_copy; + pg_offset = 0; + last_read += to_copy; + room_left -= to_copy; + + to_copy = min_t(u32, PAGE_SIZE, room_left); + i++; + } /* while (to_copy > 0) */ + /* + * Adjust the last_read pointer + */ + pak->last_read = last_read + 4; + return offset; +} + +/* ============================================================ + * The file layer for reading data from this device + * ============================================================ + */ +static int vtpm_op_open(struct inode *inode, struct file *f) +{ + int rc = 0; + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + if (dataex.has_opener == 0) { + dataex.has_opener = 1; + } else { + rc = -EPERM; + } + write_unlock_irqrestore(&dataex.pak_lock, flags); + return rc; +} + +static ssize_t vtpm_op_read(struct file *file, + char __user * data, size_t size, loff_t * offset) +{ + int ret_size = -ENODATA; + struct packet *pak = NULL; + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + if (dataex.aborted) { + dataex.aborted = 0; + dataex.copied_so_far = 0; + write_unlock_irqrestore(&dataex.pak_lock, flags); + return -EIO; + } + + if (list_empty(&dataex.pending_pak)) { + write_unlock_irqrestore(&dataex.pak_lock, flags); + wait_event_interruptible(dataex.wait_queue, + !list_empty(&dataex.pending_pak)); + write_lock_irqsave(&dataex.pak_lock, flags); + dataex.copied_so_far = 0; + } + + if (!list_empty(&dataex.pending_pak)) { + unsigned int left; + + pak = list_entry(dataex.pending_pak.next, struct packet, next); + left = pak->data_len - dataex.copied_so_far; + list_del(&pak->next); + write_unlock_irqrestore(&dataex.pak_lock, flags); + + DPRINTK("size given by app: %zu, available: %u\n", size, left); + + ret_size = min_t(size_t, size, left); + + ret_size = packet_read(pak, ret_size, data, size, 1); + + write_lock_irqsave(&dataex.pak_lock, flags); + + if (ret_size < 0) { + del_singleshot_timer_sync(&pak->processing_timer); + packet_free(pak); + dataex.copied_so_far = 0; + } else { + DPRINTK("Copied %d bytes to user buffer\n", ret_size); + + dataex.copied_so_far += ret_size; + if (dataex.copied_so_far >= pak->data_len + 4) { + DPRINTK("All data from this packet given to app.\n"); + /* All data given to app */ + + del_singleshot_timer_sync(&pak-> + processing_timer); + list_add_tail(&pak->next, &dataex.current_pak); + /* + * The more fontends that are handled at the same time, + * the more time we give the TPM to process the request. + */ + mod_timer(&pak->processing_timer, + jiffies + (num_frontends * 60 * HZ)); + dataex.copied_so_far = 0; + } else { + list_add(&pak->next, &dataex.pending_pak); + } + } + } + write_unlock_irqrestore(&dataex.pak_lock, flags); + + DPRINTK("Returning result from read to app: %d\n", ret_size); + + return ret_size; +} + +/* + * Write operation - only works after a previous read operation! + */ +static ssize_t vtpm_op_write(struct file *file, + const char __user * data, size_t size, + loff_t * offset) +{ + struct packet *pak; + int rc = 0; + unsigned int off = 4; + unsigned long flags; + struct vtpm_resp_hdr vrh; + + /* + * Minimum required packet size is: + * 4 bytes for instance number + * 2 bytes for tag + * 4 bytes for paramSize + * 4 bytes for the ordinal + * sum: 14 bytes + */ + if (size < sizeof (vrh)) + return -EFAULT; + + if (copy_from_user(&vrh, data, sizeof (vrh))) + return -EFAULT; + + /* malformed packet? */ + if ((off + ntohl(vrh.len_no)) != size) + return -EFAULT; + + write_lock_irqsave(&dataex.pak_lock, flags); + pak = packet_find_instance(&dataex.current_pak, + ntohl(vrh.instance_no)); + + if (pak == NULL) { + write_unlock_irqrestore(&dataex.pak_lock, flags); + DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n", + ntohl(vrh.instance_no)); + return -EFAULT; + } + + del_singleshot_timer_sync(&pak->processing_timer); + list_del(&pak->next); + + write_unlock_irqrestore(&dataex.pak_lock, flags); + + /* + * The first 'offset' bytes must be the instance number - skip them. + */ + size -= off; + + rc = packet_write(pak, &data[off], size, 1); + + if (rc > 0) { + /* I neglected the first 4 bytes */ + rc += off; + } + packet_free(pak); + return rc; +} + +static int vtpm_op_release(struct inode *inode, struct file *file) +{ + unsigned long flags; + + vtpm_release_packets(NULL, 1); + write_lock_irqsave(&dataex.pak_lock, flags); + dataex.has_opener = 0; + write_unlock_irqrestore(&dataex.pak_lock, flags); + return 0; +} + +static unsigned int vtpm_op_poll(struct file *file, + struct poll_table_struct *pts) +{ + unsigned int flags = POLLOUT | POLLWRNORM; + + poll_wait(file, &dataex.wait_queue, pts); + if (!list_empty(&dataex.pending_pak)) { + flags |= POLLIN | POLLRDNORM; + } + return flags; +} + +static const struct file_operations vtpm_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = vtpm_op_open, + .read = vtpm_op_read, + .write = vtpm_op_write, + .release = vtpm_op_release, + .poll = vtpm_op_poll, +}; + +static struct miscdevice vtpms_miscdevice = { + .minor = 225, + .name = "vtpm", + .fops = &vtpm_ops, +}; + +/*************************************************************** + Utility functions +***************************************************************/ + +static int tpm_send_fail_message(struct packet *pak, u8 req_tag) +{ + int rc; + static const unsigned char tpm_error_message_fail[] = { + 0x00, 0x00, + 0x00, 0x00, 0x00, 0x0a, + 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */ + }; + unsigned char buffer[sizeof (tpm_error_message_fail)]; + + memcpy(buffer, tpm_error_message_fail, + sizeof (tpm_error_message_fail)); + /* + * Insert the right response tag depending on the given tag + * All response tags are '+3' to the request tag. + */ + buffer[1] = req_tag + 3; + + /* + * Write the data to shared memory and notify the front-end + */ + rc = packet_write(pak, buffer, sizeof (buffer), 0); + + return rc; +} + +static int _vtpm_release_packets(struct list_head *head, + tpmif_t * tpmif, int send_msgs) +{ + int aborted = 0; + int c = 0; + struct packet *pak; + struct list_head *pos, *tmp; + + list_for_each_safe(pos, tmp, head) { + pak = list_entry(pos, struct packet, next); + c += 1; + + if (tpmif == NULL || pak->tpmif == tpmif) { + int can_send = 0; + + del_singleshot_timer_sync(&pak->processing_timer); + list_del(&pak->next); + + if (pak->tpmif && pak->tpmif->status == CONNECTED) { + can_send = 1; + } + + if (send_msgs && can_send) { + tpm_send_fail_message(pak, pak->req_tag); + } + packet_free(pak); + if (c == 1) + aborted = 1; + } + } + return aborted; +} + +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs) +{ + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + + dataex.aborted = _vtpm_release_packets(&dataex.pending_pak, + tpmif, + send_msgs); + _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs); + + write_unlock_irqrestore(&dataex.pak_lock, flags); + return 0; +} + +static int vtpm_queue_packet(struct packet *pak) +{ + int rc = 0; + + if (dataex.has_opener) { + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + list_add_tail(&pak->next, &dataex.pending_pak); + /* give the TPM some time to pick up the request */ + mod_timer(&pak->processing_timer, jiffies + (30 * HZ)); + write_unlock_irqrestore(&dataex.pak_lock, flags); + + wake_up_interruptible(&dataex.wait_queue); + } else { + rc = -EFAULT; + } + return rc; +} + +static int vtpm_receive(tpmif_t * tpmif, u32 size) +{ + int rc = 0; + unsigned char buffer[10]; + __be32 *native_size; + struct packet *pak = packet_alloc(tpmif, size, 0, 0); + + if (!pak) + return -ENOMEM; + /* + * Read 10 bytes from the received buffer to test its + * content for validity. + */ + if (sizeof (buffer) != packet_read(pak, + sizeof (buffer), buffer, + sizeof (buffer), 0)) { + goto failexit; + } + /* + * Reset the packet read pointer so we can read all its + * contents again. + */ + packet_reset(pak); + + native_size = (__force __be32 *) (&buffer[4 + 2]); + /* + * Verify that the size of the packet is correct + * as indicated and that there's actually someone reading packets. + * The minimum size of the packet is '10' for tag, size indicator + * and ordinal. + */ + if (size < 10 || + be32_to_cpu(*native_size) != size || + 0 == dataex.has_opener || tpmif->status != CONNECTED) { + rc = -EINVAL; + goto failexit; + } else { + rc = vtpm_queue_packet(pak); + if (rc < 0) + goto failexit; + } + return 0; + + failexit: + if (pak) { + tpm_send_fail_message(pak, buffer[4 + 1]); + packet_free(pak); + } + return rc; +} + +/* + * Timeout function that gets invoked when a packet has not been processed + * during the timeout period. + * The packet must be on a list when this function is invoked. This + * also means that once its taken off a list, the timer must be + * destroyed as well. + */ +static void processing_timeout(unsigned long ptr) +{ + struct packet *pak = (struct packet *)ptr; + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + /* + * The packet needs to be searched whether it + * is still on the list. + */ + if (pak == packet_find_packet(&dataex.pending_pak, pak) || + pak == packet_find_packet(&dataex.current_pak, pak)) { + if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) { + tpm_send_fail_message(pak, pak->req_tag); + } + /* discard future responses */ + pak->flags |= PACKET_FLAG_DISCARD_RESPONSE; + } + + write_unlock_irqrestore(&dataex.pak_lock, flags); +} + +static void tpm_tx_action(unsigned long unused); +static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0); + +static struct list_head tpm_schedule_list; +static spinlock_t tpm_schedule_list_lock; + +static inline void maybe_schedule_tx_action(void) +{ + smp_mb(); + tasklet_schedule(&tpm_tx_tasklet); +} + +static inline int __on_tpm_schedule_list(tpmif_t * tpmif) +{ + return tpmif->list.next != NULL; +} + +static void remove_from_tpm_schedule_list(tpmif_t * tpmif) +{ + spin_lock_irq(&tpm_schedule_list_lock); + if (likely(__on_tpm_schedule_list(tpmif))) { + list_del(&tpmif->list); + tpmif->list.next = NULL; + tpmif_put(tpmif); + } + spin_unlock_irq(&tpm_schedule_list_lock); +} + +static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif) +{ + if (__on_tpm_schedule_list(tpmif)) + return; + + spin_lock_irq(&tpm_schedule_list_lock); + if (!__on_tpm_schedule_list(tpmif) && tpmif->active) { + list_add_tail(&tpmif->list, &tpm_schedule_list); + tpmif_get(tpmif); + } + spin_unlock_irq(&tpm_schedule_list_lock); +} + +void tpmif_schedule_work(tpmif_t * tpmif) +{ + add_to_tpm_schedule_list_tail(tpmif); + maybe_schedule_tx_action(); +} + +void tpmif_deschedule_work(tpmif_t * tpmif) +{ + remove_from_tpm_schedule_list(tpmif); +} + +static void tpm_tx_action(unsigned long unused) +{ + struct list_head *ent; + tpmif_t *tpmif; + tpmif_tx_request_t *tx; + + DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__); + + while (!list_empty(&tpm_schedule_list)) { + /* Get a tpmif from the list with work to do. */ + ent = tpm_schedule_list.next; + tpmif = list_entry(ent, tpmif_t, list); + tpmif_get(tpmif); + remove_from_tpm_schedule_list(tpmif); + + tx = &tpmif->tx->ring[0].req; + + /* pass it up */ + vtpm_receive(tpmif, tx->size); + + tpmif_put(tpmif); + } +} + +irqreturn_t tpmif_be_int(int irq, void *dev_id) +{ + tpmif_t *tpmif = (tpmif_t *) dev_id; + + add_to_tpm_schedule_list_tail(tpmif); + maybe_schedule_tx_action(); + return IRQ_HANDLED; +} + +static int __init tpmback_init(void) +{ + int rc; + + if ((rc = misc_register(&vtpms_miscdevice)) != 0) { + printk(KERN_ALERT + "Could not register misc device for TPM BE.\n"); + return rc; + } + + dataex_init(&dataex); + + spin_lock_init(&tpm_schedule_list_lock); + INIT_LIST_HEAD(&tpm_schedule_list); + + rc = tpmif_interface_init(); + if (!rc) { + rc = tpmif_xenbus_init(); + if (rc) + tpmif_interface_exit(); + } + if (rc) { + misc_deregister(&vtpms_miscdevice); + return rc; + } + + printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); + + return 0; +} +module_init(tpmback_init); + +static void __exit tpmback_exit(void) +{ + vtpm_release_packets(NULL, 0); + tpmif_xenbus_exit(); + tpmif_interface_exit(); + misc_deregister(&vtpms_miscdevice); +} +module_exit(tpmback_exit) + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/tpmback/Makefile +++ linux-ec2-2.6.31/drivers/xen/tpmback/Makefile @@ -0,0 +1,4 @@ + +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o + +tpmbk-y += tpmback.o interface.o xenbus.o --- linux-ec2-2.6.31.orig/drivers/xen/tpmback/common.h +++ linux-ec2-2.6.31/drivers/xen/tpmback/common.h @@ -0,0 +1,85 @@ +/****************************************************************************** + * drivers/xen/tpmback/common.h + */ + +#ifndef __TPM__BACKEND__COMMON_H__ +#define __TPM__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct backend_info; + +typedef struct tpmif_st { + struct list_head tpmif_list; + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + + /* Physical parameters of the comms window. */ + unsigned int irq; + + /* The shared rings and indexes. */ + tpmif_tx_interface_t *tx; + struct vm_struct *tx_area; + + /* Miscellaneous private stuff. */ + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + int active; + + struct tpmif_st *hash_next; + struct list_head list; /* scheduling list */ + atomic_t refcnt; + + struct backend_info *bi; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; + struct page **mmap_pages; + + char devname[20]; +} tpmif_t; + +void tpmif_disconnect_complete(tpmif_t * tpmif); +tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi); +int tpmif_interface_init(void); +void tpmif_interface_exit(void); +void tpmif_schedule_work(tpmif_t * tpmif); +void tpmif_deschedule_work(tpmif_t * tpmif); +int tpmif_xenbus_init(void); +void tpmif_xenbus_exit(void); +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); +irqreturn_t tpmif_be_int(int irq, void *dev_id); + +long int tpmback_get_instance(struct backend_info *bi); + +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs); + + +#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define tpmif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + tpmif_disconnect_complete(_b); \ + } while (0) + +extern int num_frontends; + +static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx) +{ + return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx])); +} + +#endif /* __TPMIF__BACKEND__COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/evtchn/evtchn.c +++ linux-ec2-2.6.31/drivers/xen/evtchn/evtchn.c @@ -0,0 +1,562 @@ +/****************************************************************************** + * evtchn.c + * + * Driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004-2005, K A Fraser + * Multi-process extensions Copyright (c) 2004, Steven Smith + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct per_user_data { + /* Notification ring, accessed via /dev/xen/evtchn. */ +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + evtchn_port_t *ring; + unsigned int ring_cons, ring_prod, ring_overflow; + struct mutex ring_cons_mutex; /* protect against concurrent readers */ + + /* Processes wait on this queue when ring is empty. */ + wait_queue_head_t evtchn_wait; + struct fasync_struct *evtchn_async_queue; + + int bind_cpu; + int nr_event_wrong_delivery; +}; + +/* Who's bound to each port? */ +static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +static spinlock_t port_user_lock; + +void evtchn_device_upcall(int port) +{ + struct per_user_data *u; + + spin_lock(&port_user_lock); + + mask_evtchn(port); + clear_evtchn(port); + + if ((u = port_user[port]) != NULL) { + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + wmb(); /* Ensure ring contents visible */ + if (u->ring_cons == u->ring_prod++) { + wake_up_interruptible(&u->evtchn_wait); + kill_fasync(&u->evtchn_async_queue, + SIGIO, POLL_IN); + } + } else { + u->ring_overflow = 1; + } + } + + spin_unlock(&port_user_lock); +} + +static void evtchn_check_wrong_delivery(struct per_user_data *u) +{ + evtchn_port_t port; + unsigned int current_cpu = smp_processor_id(); + + /* Delivered to correct CPU? All is good. */ + if (u->bind_cpu == current_cpu) { + u->nr_event_wrong_delivery = 0; + return; + } + + /* Tolerate up to 100 consecutive misdeliveries. */ + if (++u->nr_event_wrong_delivery < 100) + return; + + spin_lock_irq(&port_user_lock); + + for (port = 0; port < NR_EVENT_CHANNELS; port++) + if (port_user[port] == u) + rebind_evtchn_to_cpu(port, current_cpu); + + u->bind_cpu = current_cpu; + u->nr_event_wrong_delivery = 0; + + spin_unlock_irq(&port_user_lock); +} + +static ssize_t evtchn_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int rc; + unsigned int c, p, bytes1 = 0, bytes2 = 0; + struct per_user_data *u = file->private_data; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + if (count == 0) + return 0; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + for (;;) { + mutex_lock(&u->ring_cons_mutex); + + rc = -EFBIG; + if (u->ring_overflow) + goto unlock_out; + + if ((c = u->ring_cons) != (p = u->ring_prod)) + break; + + mutex_unlock(&u->ring_cons_mutex); + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible( + u->evtchn_wait, u->ring_cons != u->ring_prod); + if (rc) + return rc; + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + sizeof(evtchn_port_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + } else { + bytes1 = (p - c) * sizeof(evtchn_port_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if (bytes1 > count) { + bytes1 = count; + bytes2 = 0; + } else if ((bytes1 + bytes2) > count) { + bytes2 = count - bytes1; + } + + rc = -EFAULT; + rmb(); /* Ensure that we see the port before we copy it. */ + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + ((bytes2 != 0) && + copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) + goto unlock_out; + + evtchn_check_wrong_delivery(u); + + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); + rc = bytes1 + bytes2; + + unlock_out: + mutex_unlock(&u->ring_cons_mutex); + return rc; +} + +static ssize_t evtchn_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int rc, i; + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + struct per_user_data *u = file->private_data; + + if (kbuf == NULL) + return -ENOMEM; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + rc = 0; + if (count == 0) + goto out; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + rc = -EFAULT; + if (copy_from_user(kbuf, buf, count) != 0) + goto out; + + spin_lock_irq(&port_user_lock); + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) + unmask_evtchn(kbuf[i]); + spin_unlock_irq(&port_user_lock); + + rc = count; + + out: + free_page((unsigned long)kbuf); + return rc; +} + +static unsigned int next_bind_cpu(cpumask_t map) +{ + static unsigned int bind_cpu; + bind_cpu = next_cpu(bind_cpu, map); + if (bind_cpu >= NR_CPUS) + bind_cpu = first_cpu(map); + return bind_cpu; +} + +static void evtchn_bind_to_user(struct per_user_data *u, int port) +{ + spin_lock_irq(&port_user_lock); + + BUG_ON(port_user[port] != NULL); + port_user[port] = u; + + if (u->bind_cpu == -1) + u->bind_cpu = next_bind_cpu(cpu_online_map); + + rebind_evtchn_to_cpu(port, u->bind_cpu); + + unmask_evtchn(port); + + spin_unlock_irq(&port_user_lock); +} + +static long evtchn_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + int rc; + struct per_user_data *u = file->private_data; + void __user *uarg = (void __user *) arg; + + switch (cmd) { + case IOCTL_EVTCHN_BIND_VIRQ: { + struct ioctl_evtchn_bind_virq bind; + struct evtchn_bind_virq bind_virq; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_virq.virq = bind.virq; + bind_virq.vcpu = 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (rc != 0) + break; + + rc = bind_virq.port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { + struct ioctl_evtchn_bind_interdomain bind; + struct evtchn_bind_interdomain bind_interdomain; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_interdomain.remote_dom = bind.remote_domain; + bind_interdomain.remote_port = bind.remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (rc != 0) + break; + + rc = bind_interdomain.local_port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { + struct ioctl_evtchn_bind_unbound_port bind; + struct evtchn_alloc_unbound alloc_unbound; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = bind.remote_domain; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + break; + + rc = alloc_unbound.port; + evtchn_bind_to_user(u, rc); + break; + } + + case IOCTL_EVTCHN_UNBIND: { + struct ioctl_evtchn_unbind unbind; + struct evtchn_close close; + int ret; + + rc = -EFAULT; + if (copy_from_user(&unbind, uarg, sizeof(unbind))) + break; + + rc = -EINVAL; + if (unbind.port >= NR_EVENT_CHANNELS) + break; + + spin_lock_irq(&port_user_lock); + + rc = -ENOTCONN; + if (port_user[unbind.port] != u) { + spin_unlock_irq(&port_user_lock); + break; + } + + port_user[unbind.port] = NULL; + mask_evtchn(unbind.port); + rebind_evtchn_to_cpu(unbind.port, 0); + + spin_unlock_irq(&port_user_lock); + + close.port = unbind.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + BUG_ON(ret); + + rc = 0; + break; + } + + case IOCTL_EVTCHN_NOTIFY: { + struct ioctl_evtchn_notify notify; + + rc = -EFAULT; + if (copy_from_user(¬ify, uarg, sizeof(notify))) + break; + + if (notify.port >= NR_EVENT_CHANNELS) { + rc = -EINVAL; + } else if (port_user[notify.port] != u) { + rc = -ENOTCONN; + } else { + notify_remote_via_evtchn(notify.port); + rc = 0; + } + break; + } + + case IOCTL_EVTCHN_RESET: { + /* Initialise the ring to empty. Clear errors. */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&port_user_lock); + u->ring_cons = u->ring_prod = u->ring_overflow = 0; + spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->ring_cons_mutex); + rc = 0; + break; + } + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +static unsigned int evtchn_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = POLLOUT | POLLWRNORM; + struct per_user_data *u = file->private_data; + + poll_wait(file, &u->evtchn_wait, wait); + if (u->ring_cons != u->ring_prod) + mask |= POLLIN | POLLRDNORM; + if (u->ring_overflow) + mask = POLLERR; + return mask; +} + +static int evtchn_fasync(int fd, struct file *filp, int on) +{ + struct per_user_data *u = filp->private_data; + return fasync_helper(fd, filp, on, &u->evtchn_async_queue); +} + +static int evtchn_open(struct inode *inode, struct file *filp) +{ + struct per_user_data *u; + + if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL) + return -ENOMEM; + + memset(u, 0, sizeof(*u)); + init_waitqueue_head(&u->evtchn_wait); + + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + if (u->ring == NULL) { + kfree(u); + return -ENOMEM; + } + + mutex_init(&u->ring_cons_mutex); + + filp->private_data = u; + + u->bind_cpu = -1; + + return 0; +} + +static int evtchn_release(struct inode *inode, struct file *filp) +{ + int i; + struct per_user_data *u = filp->private_data; + struct evtchn_close close; + + spin_lock_irq(&port_user_lock); + + free_page((unsigned long)u->ring); + + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + int ret; + if (port_user[i] != u) + continue; + + port_user[i] = NULL; + mask_evtchn(i); + rebind_evtchn_to_cpu(i, 0); + + close.port = i; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + BUG_ON(ret); + } + + spin_unlock_irq(&port_user_lock); + + kfree(u); + + return 0; +} + +static const struct file_operations evtchn_fops = { + .owner = THIS_MODULE, + .read = evtchn_read, + .write = evtchn_write, + .unlocked_ioctl = evtchn_ioctl, + .poll = evtchn_poll, + .fasync = evtchn_fasync, + .open = evtchn_open, + .release = evtchn_release, +}; + +static struct miscdevice evtchn_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "evtchn", + .fops = &evtchn_fops, +}; + +static int __cpuinit evtchn_cpu_notify(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + cpumask_t map = cpu_online_map; + int i, j, newcpu; + struct per_user_data *u; + + switch (action) { + case CPU_DOWN_PREPARE: + cpu_clear(hotcpu, map); + spin_lock_irq(&port_user_lock); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + u = port_user[i]; + if ((u == NULL) || (u->bind_cpu != hotcpu)) + continue; + newcpu = next_bind_cpu(map); + for (j = i; j < NR_EVENT_CHANNELS; j++) + if (port_user[j] == u) + rebind_evtchn_to_cpu(j, newcpu); + u->bind_cpu = newcpu; + } + spin_unlock_irq(&port_user_lock); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata evtchn_cpu_nfb = { + .notifier_call = evtchn_cpu_notify +}; + +static int __init evtchn_init(void) +{ + int err; + + if (!is_running_on_xen()) + return -ENODEV; + + spin_lock_init(&port_user_lock); + memset(port_user, 0, sizeof(port_user)); + + /* Create '/dev/misc/evtchn'. */ + err = misc_register(&evtchn_miscdev); + if (err != 0) { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + register_cpu_notifier(&evtchn_cpu_nfb); + + printk("Event-channel device installed.\n"); + + return 0; +} + +static void __exit evtchn_cleanup(void) +{ + misc_deregister(&evtchn_miscdev); + unregister_cpu_notifier(&evtchn_cpu_nfb); +} + +module_init(evtchn_init); +module_exit(evtchn_cleanup); + +MODULE_LICENSE("Dual BSD/GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/evtchn/Makefile +++ linux-ec2-2.6.31/drivers/xen/evtchn/Makefile @@ -0,0 +1,2 @@ + +obj-y := evtchn.o --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_fwd.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_fwd.c @@ -0,0 +1,425 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "accel.h" +#include "accel_cuckoo_hash.h" +#include "accel_util.h" +#include "accel_solarflare.h" + +#include "driverlink_api.h" + +#include +#include +#include + +/* State stored in the forward table */ +struct fwd_struct { + struct list_head link; /* Forms list */ + void * context; + __u8 valid; + __u8 mac[ETH_ALEN]; +}; + +/* Max value we support */ +#define NUM_FWDS_BITS 8 +#define NUM_FWDS (1 << NUM_FWDS_BITS) +#define FWD_MASK (NUM_FWDS - 1) + +struct port_fwd { + /* Make a list */ + struct list_head link; + /* Hash table to store the fwd_structs */ + cuckoo_hash_table fwd_hash_table; + /* The array of fwd_structs */ + struct fwd_struct *fwd_array; + /* Linked list of entries in use. */ + struct list_head fwd_list; + /* Could do something clever with a reader/writer lock. */ + spinlock_t fwd_lock; + /* Make find_free_entry() a bit faster by caching this */ + int last_free_index; +}; + +/* + * This is unlocked as it's only called from dl probe and remove, + * which are themselves synchronised. Could get rid of it entirely as + * it's never iterated, but useful for debug + */ +static struct list_head port_fwds; + + +/* Search the fwd_array for an unused entry */ +static int fwd_find_free_entry(struct port_fwd *fwd_set) +{ + int index = fwd_set->last_free_index; + + do { + if (!fwd_set->fwd_array[index].valid) { + fwd_set->last_free_index = index; + return index; + } + index++; + if (index >= NUM_FWDS) + index = 0; + } while (index != fwd_set->last_free_index); + + return -ENOMEM; +} + + +/* Look up a MAC in the hash table. Caller should hold table lock. */ +static inline struct fwd_struct *fwd_find_entry(const __u8 *mac, + struct port_fwd *fwd_set) +{ + cuckoo_hash_value value; + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); + + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, + (cuckoo_hash_key *)(&key), + &value)) { + struct fwd_struct *fwd = &fwd_set->fwd_array[value]; + DPRINTK_ON(memcmp(fwd->mac, mac, ETH_ALEN) != 0); + return fwd; + } + + return NULL; +} + + +/* Initialise each nic port's fowarding table */ +void *netback_accel_init_fwd_port(void) +{ + struct port_fwd *fwd_set; + + fwd_set = kzalloc(sizeof(struct port_fwd), GFP_KERNEL); + if (fwd_set == NULL) { + return NULL; + } + + spin_lock_init(&fwd_set->fwd_lock); + + fwd_set->fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS, + GFP_KERNEL); + if (fwd_set->fwd_array == NULL) { + kfree(fwd_set); + return NULL; + } + + if (cuckoo_hash_init(&fwd_set->fwd_hash_table, NUM_FWDS_BITS, 8) != 0) { + kfree(fwd_set->fwd_array); + kfree(fwd_set); + return NULL; + } + + INIT_LIST_HEAD(&fwd_set->fwd_list); + + list_add(&fwd_set->link, &port_fwds); + + return fwd_set; +} + + +void netback_accel_shutdown_fwd_port(void *fwd_priv) +{ + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + + BUG_ON(fwd_priv == NULL); + + BUG_ON(list_empty(&port_fwds)); + list_del(&fwd_set->link); + + BUG_ON(!list_empty(&fwd_set->fwd_list)); + + cuckoo_hash_destroy(&fwd_set->fwd_hash_table); + kfree(fwd_set->fwd_array); + kfree(fwd_set); +} + + +int netback_accel_init_fwd() +{ + INIT_LIST_HEAD(&port_fwds); + return 0; +} + + +void netback_accel_shutdown_fwd() +{ + BUG_ON(!list_empty(&port_fwds)); +} + + +/* + * Add an entry to the forwarding table. Returns -ENOMEM if no + * space. + */ +int netback_accel_fwd_add(const __u8 *mac, void *context, void *fwd_priv) +{ + struct fwd_struct *fwd; + int rc = 0, index; + unsigned long flags; + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); + + BUG_ON(fwd_priv == NULL); + + DPRINTK("Adding mac %s\n", print_mac(buf, mac)); + + spin_lock_irqsave(&fwd_set->fwd_lock, flags); + + if ((rc = fwd_find_free_entry(fwd_set)) < 0 ) { + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + return rc; + } + + index = rc; + + /* Shouldn't already be in the table */ + if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, + (cuckoo_hash_key *)(&key), &rc) != 0) { + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + EPRINTK("MAC address %s already accelerated.\n", + print_mac(buf, mac)); + return -EEXIST; + } + + if ((rc = cuckoo_hash_add(&fwd_set->fwd_hash_table, + (cuckoo_hash_key *)(&key), index, 1)) == 0) { + fwd = &fwd_set->fwd_array[index]; + fwd->valid = 1; + fwd->context = context; + memcpy(fwd->mac, mac, ETH_ALEN); + list_add(&fwd->link, &fwd_set->fwd_list); + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds++); + } + + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + + /* + * No need to tell frontend that this mac address is local - + * it should auto-discover through packets on fastpath what is + * local and what is not, and just being on same server + * doesn't make it local (it could be on a different + * bridge) + */ + + return rc; +} + + +/* remove an entry from the forwarding tables. */ +void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv) +{ + struct fwd_struct *fwd; + unsigned long flags; + cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); + + DPRINTK("Removing mac %s\n", print_mac(buf, mac)); + + BUG_ON(fwd_priv == NULL); + + spin_lock_irqsave(&fwd_set->fwd_lock, flags); + + fwd = fwd_find_entry(mac, fwd_set); + if (fwd != NULL) { + BUG_ON(list_empty(&fwd_set->fwd_list)); + list_del(&fwd->link); + + fwd->valid = 0; + cuckoo_hash_remove(&fwd_set->fwd_hash_table, + (cuckoo_hash_key *)(&key)); + NETBACK_ACCEL_STATS_OP(global_stats.num_fwds--); + } + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + + /* + * No need to tell frontend that this is no longer present - + * the frontend is currently only interested in remote + * addresses and it works these out (mostly) by itself + */ +} + + +/* Set the context pointer for a hash table entry. */ +int netback_accel_fwd_set_context(const __u8 *mac, void *context, + void *fwd_priv) +{ + struct fwd_struct *fwd; + unsigned long flags; + int rc = -ENOENT; + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + + BUG_ON(fwd_priv == NULL); + + spin_lock_irqsave(&fwd_set->fwd_lock, flags); + fwd = fwd_find_entry(mac, fwd_set); + if (fwd != NULL) { + fwd->context = context; + rc = 0; + } + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + return rc; +} + + +/************************************************************************** + * Process a received packet + **************************************************************************/ + +/* + * Returns whether or not we have a match in our forward table for the + * this skb. Must be called with appropriate fwd_lock already held + */ +static struct netback_accel *for_a_vnic(struct netback_pkt_buf *skb, + struct port_fwd *fwd_set) +{ + struct fwd_struct *fwd; + struct netback_accel *retval = NULL; + + fwd = fwd_find_entry(skb->mac.raw, fwd_set); + if (fwd != NULL) + retval = fwd->context; + return retval; +} + + +static inline int packet_is_arp_reply(struct sk_buff *skb) +{ + return skb->protocol == ntohs(ETH_P_ARP) + && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY); +} + + +static inline void hdr_to_filt(struct ethhdr *ethhdr, struct iphdr *ip, + struct netback_accel_filter_spec *spec) +{ + spec->proto = ip->protocol; + spec->destip_be = ip->daddr; + memcpy(spec->mac, ethhdr->h_source, ETH_ALEN); + + if (ip->protocol == IPPROTO_TCP) { + struct tcphdr *tcp = (struct tcphdr *)((char *)ip + 4 * ip->ihl); + spec->destport_be = tcp->dest; + } else { + struct udphdr *udp = (struct udphdr *)((char *)ip + 4 * ip->ihl); + EPRINTK_ON(ip->protocol != IPPROTO_UDP); + spec->destport_be = udp->dest; + } +} + + +static inline int netback_accel_can_filter(struct netback_pkt_buf *skb) +{ + return (skb->protocol == htons(ETH_P_IP) && + ((skb->nh.iph->protocol == IPPROTO_TCP) || + (skb->nh.iph->protocol == IPPROTO_UDP))); +} + + +static inline void netback_accel_filter_packet(struct netback_accel *bend, + struct netback_pkt_buf *skb) +{ + struct netback_accel_filter_spec fs; + struct ethhdr *eh = (struct ethhdr *)(skb->mac.raw); + + hdr_to_filt(eh, skb->nh.iph, &fs); + + netback_accel_filter_check_add(bend, &fs); +} + + +/* + * Receive a packet and do something appropriate with it. Return true + * to take exclusive ownership of the packet. This is verging on + * solarflare specific + */ +void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv) +{ + struct netback_accel *bend; + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + unsigned long flags; + + BUG_ON(fwd_priv == NULL); + + /* Checking for bcast is cheaper so do that first */ + if (is_broadcast_ether_addr(skb->mac.raw)) { + /* pass through the slow path by not claiming ownership */ + return; + } else if (is_multicast_ether_addr(skb->mac.raw)) { + /* pass through the slow path by not claiming ownership */ + return; + } else { + /* It is unicast */ + spin_lock_irqsave(&fwd_set->fwd_lock, flags); + /* We insert filter to pass it off to a VNIC */ + if ((bend = for_a_vnic(skb, fwd_set)) != NULL) + if (netback_accel_can_filter(skb)) + netback_accel_filter_packet(bend, skb); + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + } + return; +} + + +void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv) +{ + __u8 *mac; + unsigned long flags; + struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + struct fwd_struct *fwd; + + BUG_ON(fwd_priv == NULL); + + if (is_broadcast_ether_addr(skb_mac_header(skb)) + && packet_is_arp_reply(skb)) { + DECLARE_MAC_BUF(buf); + + /* + * update our fast path forwarding to reflect this + * gratuitous ARP + */ + mac = skb_mac_header(skb)+ETH_ALEN; + + DPRINTK("%s: found gratuitous ARP for %s\n", + __FUNCTION__, print_mac(buf, mac)); + + spin_lock_irqsave(&fwd_set->fwd_lock, flags); + /* + * Might not be local, but let's tell them all it is, + * and they can restore the fastpath if they continue + * to get packets that way + */ + list_for_each_entry(fwd, &fwd_set->fwd_list, link) { + struct netback_accel *bend = fwd->context; + if (bend != NULL) + netback_accel_msg_tx_new_localmac(bend, mac); + } + + spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); + } + return; +} --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel.c @@ -0,0 +1,147 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "accel.h" +#include "accel_msg_iface.h" +#include "accel_solarflare.h" + +#include + +#ifdef EFX_GCOV +#include "gcov.h" +#endif + +static int netback_accel_netdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *net_dev = (struct net_device *)ptr; + struct netback_accel *bend; + + if ((event == NETDEV_UP) || + (event == NETDEV_DOWN) || + (event == NETDEV_CHANGE)) { + mutex_lock(&bend_list_mutex); + bend = bend_list; + while (bend != NULL) { + mutex_lock(&bend->bend_mutex); + /* + * This happens when the shared pages have + * been unmapped, but the bend not yet removed + * from list + */ + if (bend->shared_page == NULL) + goto next; + + if (bend->net_dev->ifindex == net_dev->ifindex) { + int ok; + if (event == NETDEV_CHANGE) + ok = (netif_carrier_ok(net_dev) && + (net_dev->flags & IFF_UP)); + else + ok = (netif_carrier_ok(net_dev) && + (event == NETDEV_UP)); + netback_accel_set_interface_state(bend, ok); + } + + next: + mutex_unlock(&bend->bend_mutex); + bend = bend->next_bend; + } + mutex_unlock(&bend_list_mutex); + } + + return NOTIFY_DONE; +} + + +static struct notifier_block netback_accel_netdev_notifier = { + .notifier_call = netback_accel_netdev_event, +}; + + +unsigned sfc_netback_max_pages = NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES; +module_param_named(max_pages, sfc_netback_max_pages, uint, 0644); +MODULE_PARM_DESC(max_pages, + "The number of buffer pages to enforce on each guest"); + +/* Initialise subsystems need for the accelerated fast path */ +static int __init netback_accel_init(void) +{ + int rc = 0; + +#ifdef EFX_GCOV + gcov_provider_init(THIS_MODULE); +#endif + + rc = netback_accel_init_fwd(); + if (rc != 0) + goto fail0; + + netback_accel_debugfs_init(); + + rc = netback_accel_sf_init(); + if (rc != 0) + goto fail1; + + rc = register_netdevice_notifier + (&netback_accel_netdev_notifier); + if (rc != 0) + goto fail2; + + return 0; + + fail2: + netback_accel_sf_shutdown(); + fail1: + netback_accel_debugfs_fini(); + netback_accel_shutdown_fwd(); + fail0: +#ifdef EFX_GCOV + gcov_provider_fini(THIS_MODULE); +#endif + return rc; +} + +module_init(netback_accel_init); + +static void __exit netback_accel_exit(void) +{ + unregister_netdevice_notifier(&netback_accel_netdev_notifier); + + netback_accel_sf_shutdown(); + + netback_accel_shutdown_bends(); + + netback_accel_debugfs_fini(); + + netback_accel_shutdown_fwd(); + +#ifdef EFX_GCOV + gcov_provider_fini(THIS_MODULE); +#endif +} + +module_exit(netback_accel_exit); + +MODULE_LICENSE("GPL"); --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_debugfs.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_debugfs.c @@ -0,0 +1,148 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +#include "accel.h" + +#if defined(CONFIG_DEBUG_FS) +static struct dentry *sfc_debugfs_root = NULL; +#endif + +#if NETBACK_ACCEL_STATS +struct netback_accel_global_stats global_stats; +#if defined(CONFIG_DEBUG_FS) +static struct netback_accel_global_dbfs global_dbfs; +#endif +#endif + +void netback_accel_debugfs_init(void) +{ +#if defined(CONFIG_DEBUG_FS) + sfc_debugfs_root = debugfs_create_dir("sfc_netback", NULL); + if (sfc_debugfs_root == NULL) + return; + + global_dbfs.num_fwds = debugfs_create_u32 + ("num_fwds", S_IRUSR | S_IRGRP | S_IROTH, + sfc_debugfs_root, &global_stats.num_fwds); + global_dbfs.dl_tx_packets = debugfs_create_u64 + ("dl_tx_packets", S_IRUSR | S_IRGRP | S_IROTH, + sfc_debugfs_root, &global_stats.dl_tx_packets); + global_dbfs.dl_rx_packets = debugfs_create_u64 + ("dl_rx_packets", S_IRUSR | S_IRGRP | S_IROTH, + sfc_debugfs_root, &global_stats.dl_rx_packets); + global_dbfs.dl_tx_bad_packets = debugfs_create_u64 + ("dl_tx_bad_packets", S_IRUSR | S_IRGRP | S_IROTH, + sfc_debugfs_root, &global_stats.dl_tx_bad_packets); +#endif +} + + +void netback_accel_debugfs_fini(void) +{ +#if defined(CONFIG_DEBUG_FS) + debugfs_remove(global_dbfs.num_fwds); + debugfs_remove(global_dbfs.dl_tx_packets); + debugfs_remove(global_dbfs.dl_rx_packets); + debugfs_remove(global_dbfs.dl_tx_bad_packets); + + debugfs_remove(sfc_debugfs_root); +#endif +} + + +int netback_accel_debugfs_create(struct netback_accel *bend) +{ +#if defined(CONFIG_DEBUG_FS) + /* Smallest length is 7 (vif0.0\n) */ + int length = 7, temp; + + if (sfc_debugfs_root == NULL) + return -ENOENT; + + /* Work out length of string representation of far_end and vif_num */ + temp = bend->far_end; + while (temp > 9) { + length++; + temp = temp / 10; + } + temp = bend->vif_num; + while (temp > 9) { + length++; + temp = temp / 10; + } + + bend->dbfs_dir_name = kmalloc(length, GFP_KERNEL); + if (bend->dbfs_dir_name == NULL) + return -ENOMEM; + sprintf(bend->dbfs_dir_name, "vif%d.%d", bend->far_end, bend->vif_num); + + bend->dbfs_dir = debugfs_create_dir(bend->dbfs_dir_name, + sfc_debugfs_root); + if (bend->dbfs_dir == NULL) { + kfree(bend->dbfs_dir_name); + return -ENOMEM; + } + +#if NETBACK_ACCEL_STATS + bend->dbfs.evq_wakeups = debugfs_create_u64 + ("evq_wakeups", S_IRUSR | S_IRGRP | S_IROTH, + bend->dbfs_dir, &bend->stats.evq_wakeups); + bend->dbfs.evq_timeouts = debugfs_create_u64 + ("evq_timeouts", S_IRUSR | S_IRGRP | S_IROTH, + bend->dbfs_dir, &bend->stats.evq_timeouts); + bend->dbfs.num_filters = debugfs_create_u32 + ("num_filters", S_IRUSR | S_IRGRP | S_IROTH, + bend->dbfs_dir, &bend->stats.num_filters); + bend->dbfs.num_buffer_pages = debugfs_create_u32 + ("num_buffer_pages", S_IRUSR | S_IRGRP | S_IROTH, + bend->dbfs_dir, &bend->stats.num_buffer_pages); +#endif +#endif + return 0; +} + + +int netback_accel_debugfs_remove(struct netback_accel *bend) +{ +#if defined(CONFIG_DEBUG_FS) + if (bend->dbfs_dir != NULL) { +#if NETBACK_ACCEL_STATS + debugfs_remove(bend->dbfs.evq_wakeups); + debugfs_remove(bend->dbfs.evq_timeouts); + debugfs_remove(bend->dbfs.num_filters); + debugfs_remove(bend->dbfs.num_buffer_pages); +#endif + debugfs_remove(bend->dbfs_dir); + } + + if (bend->dbfs_dir_name) + kfree(bend->dbfs_dir_name); +#endif + return 0; +} + + --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_xenbus.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_xenbus.c @@ -0,0 +1,830 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +/* drivers/xen/netback/common.h */ +#include "common.h" + +#include "accel.h" +#include "accel_solarflare.h" +#include "accel_util.h" + +#define NODENAME_PATH_FMT "backend/vif/%d/%d" + +#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \ + ((struct backend_info *)dev_get_drvdata(&(_dev)->dev))->netback_accel_priv + +/* List of all the bends currently in existence. */ +struct netback_accel *bend_list = NULL; +DEFINE_MUTEX(bend_list_mutex); + +/* Put in bend_list. Must hold bend_list_mutex */ +static void link_bend(struct netback_accel *bend) +{ + bend->next_bend = bend_list; + bend_list = bend; +} + +/* Remove from bend_list, Must hold bend_list_mutex */ +static void unlink_bend(struct netback_accel *bend) +{ + struct netback_accel *tmp = bend_list; + struct netback_accel *prev = NULL; + while (tmp != NULL) { + if (tmp == bend) { + if (prev != NULL) + prev->next_bend = bend->next_bend; + else + bend_list = bend->next_bend; + return; + } + prev = tmp; + tmp = tmp->next_bend; + } +} + + +/* Demultiplex a message IRQ from the frontend driver. */ +static irqreturn_t msgirq_from_frontend(int irq, void *context) +{ + struct xenbus_device *dev = context; + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); + VPRINTK("irq %d from device %s\n", irq, dev->nodename); + schedule_work(&bend->handle_msg); + return IRQ_HANDLED; +} + + +/* + * Demultiplex an IRQ from the frontend driver. This is never used + * functionally, but we need it to pass to the bind function, and may + * get called spuriously + */ +static irqreturn_t netirq_from_frontend(int irq, void *context) +{ + VPRINTK("netirq %d from device %s\n", irq, + ((struct xenbus_device *)context)->nodename); + + return IRQ_HANDLED; +} + + +/* Read the limits values of the xenbus structure. */ +static +void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend) +{ + int err = xenbus_gather + (XBT_NIL, dev->nodename, + "limits/max-filters", "%d", &bend->quotas.max_filters, + "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages, + "limits/max-mcasts", "%d", &bend->quotas.max_mcasts, + NULL); + if (err) { + /* + * TODO what if they have previously been set by the + * user? This will overwrite with defaults. Maybe + * not what we want to do, but useful in startup + * case + */ + DPRINTK("Failed to read quotas from xenbus, using defaults\n"); + bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS; + bend->quotas.max_buf_pages = sfc_netback_max_pages; + bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS; + } + + return; +} + + +static void bend_config_accel_change(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct netback_accel *bend; + + bend = container_of(watch, struct netback_accel, config_accel_watch); + + mutex_lock(&bend->bend_mutex); + if (bend->config_accel_watch.node != NULL) { + struct xenbus_device *dev = + (struct xenbus_device *)bend->hdev_data; + DPRINTK("Watch matched, got dev %p otherend %p\n", + dev, dev->otherend); + if(!xenbus_exists(XBT_NIL, watch->node, "")) { + DPRINTK("Ignoring watch as otherend seems invalid\n"); + goto out; + } + + cfg_hw_quotas(dev, bend); + } + out: + mutex_unlock(&bend->bend_mutex); + return; +} + + +/* + * Setup watch on "limits" in the backend vif info to know when + * configuration has been set + */ +static int setup_config_accel_watch(struct xenbus_device *dev, + struct netback_accel *bend) +{ + int err; + + VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits"); + + err = xenbus_watch_path2(dev, dev->nodename, "limits", + &bend->config_accel_watch, + bend_config_accel_change); + + if (err) { + EPRINTK("%s: Failed to register xenbus watch: %d\n", + __FUNCTION__, err); + bend->config_accel_watch.node = NULL; + return err; + } + return 0; +} + + +static int +cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend, + int *grants) +{ + /* Get some info from xenbus on the event channel and shmem grant */ + int err = xenbus_gather(XBT_NIL, dev->otherend, + "accel-msg-channel", "%u", &bend->msg_channel, + "accel-ctrl-page", "%d", &(grants[0]), + "accel-msg-page", "%d", &(grants[1]), + "accel-net-channel", "%u", &bend->net_channel, + NULL); + if (err) + EPRINTK("failed to read event channels or shmem grant: %d\n", + err); + else + DPRINTK("got event chan %d and net chan %d from frontend\n", + bend->msg_channel, bend->net_channel); + return err; +} + + +/* Setup all the comms needed to chat with the front end driver */ +static int setup_vnic(struct xenbus_device *dev) +{ + struct netback_accel *bend; + int grants[2], err, msgs_per_queue; + + bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); + + err = cfg_frontend_info(dev, bend, grants); + if (err) + goto fail1; + + /* + * If we get here, both frontend Connected and configuration + * options available. All is well. + */ + + /* Get the hardware quotas for the VNIC in question. */ + cfg_hw_quotas(dev, bend); + + /* Set up the deferred work handlers */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) + INIT_WORK(&bend->handle_msg, + netback_accel_msg_rx_handler); +#else + INIT_WORK(&bend->handle_msg, + netback_accel_msg_rx_handler, + (void*)bend); +#endif + + /* Request the frontend mac */ + err = net_accel_xen_net_read_mac(dev, bend->mac); + if (err) + goto fail2; + + /* Set up the shared page. */ + bend->shared_page = net_accel_map_grants_contig(dev, grants, 2, + &bend->sh_pages_unmap); + + if (bend->shared_page == NULL) { + EPRINTK("failed to map shared page for %s\n", dev->otherend); + err = -ENOMEM; + goto fail2; + } + + /* Initialise the shared page(s) used for comms */ + net_accel_msg_init_page(bend->shared_page, PAGE_SIZE, + (bend->net_dev->flags & IFF_UP) && + (netif_carrier_ok(bend->net_dev))); + + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg); + + net_accel_msg_init_queue + (&bend->to_domU, &bend->shared_page->queue0, + (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE), + msgs_per_queue); + + net_accel_msg_init_queue + (&bend->from_domU, &bend->shared_page->queue1, + (struct net_accel_msg *)((__u8*)bend->shared_page + + (3 * PAGE_SIZE / 2)), + msgs_per_queue); + + /* Bind the message event channel to a handler + * + * Note that we will probably get a spurious interrupt when we + * do this, so it must not be done until we have set up + * everything we need to handle it. + */ + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id, + bend->msg_channel, + msgirq_from_frontend, + 0, + "netback_accel", + dev); + if (err < 0) { + EPRINTK("failed to bind event channel: %d\n", err); + goto fail3; + } + else + bend->msg_channel_irq = err; + + /* TODO: No need to bind this evtchn to an irq. */ + err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id, + bend->net_channel, + netirq_from_frontend, + 0, + "netback_accel", + dev); + if (err < 0) { + EPRINTK("failed to bind net channel: %d\n", err); + goto fail4; + } + else + bend->net_channel_irq = err; + + /* + * Grab ourselves an entry in the forwarding hash table. We do + * this now so we don't have the embarassmesnt of sorting out + * an allocation failure while at IRQ. Because we pass NULL as + * the context, the actual hash lookup will succeed for this + * NIC, but the check for somewhere to forward to will + * fail. This is necessary to prevent forwarding before + * hardware resources are set up + */ + err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv); + if (err) { + EPRINTK("failed to add to fwd hash table\n"); + goto fail5; + } + + /* + * Say hello to frontend. Important to do this straight after + * obtaining the message queue as otherwise we are vulnerable + * to an evil frontend sending a HELLO-REPLY before we've sent + * the HELLO and confusing us + */ + netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION); + return 0; + + fail5: + unbind_from_irqhandler(bend->net_channel_irq, dev); + fail4: + unbind_from_irqhandler(bend->msg_channel_irq, dev); + fail3: + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap); + bend->shared_page = NULL; + bend->sh_pages_unmap = NULL; + fail2: + fail1: + return err; +} + + +static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend) +{ + int len; + + /* nic name used to select interface used for acceleration */ + bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len); + if (IS_ERR(bend->nicname)) + return PTR_ERR(bend->nicname); + + return 0; +} + +static const char *frontend_name = "sfc_netfront"; + +static int publish_frontend_name(struct xenbus_device *dev) +{ + struct xenbus_transaction tr; + int err; + + /* Publish the name of the frontend driver */ + do { + err = xenbus_transaction_start(&tr); + if (err != 0) { + EPRINTK("%s: transaction start failed\n", __FUNCTION__); + return err; + } + err = xenbus_printf(tr, dev->nodename, "accel-frontend", + "%s", frontend_name); + if (err != 0) { + EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__); + xenbus_transaction_end(tr, 1); + return err; + } + err = xenbus_transaction_end(tr, 0); + } while (err == -EAGAIN); + + if (err != 0) { + EPRINTK("failed to end frontend name transaction\n"); + return err; + } + return 0; +} + + +static int unpublish_frontend_name(struct xenbus_device *dev) +{ + struct xenbus_transaction tr; + int err; + + do { + err = xenbus_transaction_start(&tr); + if (err != 0) + break; + err = xenbus_rm(tr, dev->nodename, "accel-frontend"); + if (err != 0) { + xenbus_transaction_end(tr, 1); + break; + } + err = xenbus_transaction_end(tr, 0); + } while (err == -EAGAIN); + + return err; +} + + +static void cleanup_vnic(struct netback_accel *bend) +{ + struct xenbus_device *dev; + + dev = (struct xenbus_device *)bend->hdev_data; + + DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev); + + DPRINTK("%s: Remove %p's mac from fwd table...\n", + __FUNCTION__, bend); + netback_accel_fwd_remove(bend->mac, bend->fwd_priv); + + /* Free buffer table allocations */ + netback_accel_remove_buffers(bend); + + DPRINTK("%s: Release hardware resources...\n", __FUNCTION__); + if (bend->accel_shutdown) + bend->accel_shutdown(bend); + + if (bend->net_channel_irq) { + unbind_from_irqhandler(bend->net_channel_irq, dev); + bend->net_channel_irq = 0; + } + + if (bend->msg_channel_irq) { + unbind_from_irqhandler(bend->msg_channel_irq, dev); + bend->msg_channel_irq = 0; + } + + if (bend->sh_pages_unmap) { + DPRINTK("%s: Unmap grants %p\n", __FUNCTION__, + bend->sh_pages_unmap); + net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap); + bend->sh_pages_unmap = NULL; + bend->shared_page = NULL; + } +} + + +/*************************************************************************/ + +/* + * The following code handles accelstate changes between the frontend + * and the backend. It calls setup_vnic and cleanup_vnic in matching + * pairs in response to transitions. + * + * Valid state transitions for Dom0 are as follows: + * + * Closed->Init on probe or in response to Init from domU + * Closed->Closing on error/remove + * + * Init->Connected in response to Connected from domU + * Init->Closing on error/remove or in response to Closing from domU + * + * Connected->Closing on error/remove or in response to Closing from domU + * + * Closing->Closed in response to Closed from domU + * + */ + + +static void netback_accel_frontend_changed(struct xenbus_device *dev, + XenbusState frontend_state) +{ + struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev); + XenbusState backend_state; + + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n", + __FUNCTION__, xenbus_strstate(bend->frontend_state), + xenbus_strstate(frontend_state),dev->nodename, dev->otherend); + + /* + * Ignore duplicate state changes. This can happen if the + * frontend changes state twice in quick succession and the + * first watch fires in the backend after the second + * transition has completed. + */ + if (bend->frontend_state == frontend_state) + return; + + bend->frontend_state = frontend_state; + backend_state = bend->backend_state; + + switch (frontend_state) { + case XenbusStateInitialising: + if (backend_state == XenbusStateClosed && + !bend->removing) + backend_state = XenbusStateInitialising; + break; + + case XenbusStateConnected: + if (backend_state == XenbusStateInitialising) { + if (!bend->vnic_is_setup && + setup_vnic(dev) == 0) { + bend->vnic_is_setup = 1; + backend_state = XenbusStateConnected; + } else { + backend_state = XenbusStateClosing; + } + } + break; + + case XenbusStateInitWait: + case XenbusStateInitialised: + default: + DPRINTK("Unknown state %s (%d) from frontend.\n", + xenbus_strstate(frontend_state), frontend_state); + /* Unknown state. Fall through. */ + case XenbusStateClosing: + if (backend_state != XenbusStateClosed) + backend_state = XenbusStateClosing; + + /* + * The bend will now persist (with watches active) in + * case the frontend comes back again, eg. after + * frontend module reload or suspend/resume + */ + + break; + + case XenbusStateUnknown: + case XenbusStateClosed: + if (bend->vnic_is_setup) { + bend->vnic_is_setup = 0; + cleanup_vnic(bend); + } + + if (backend_state == XenbusStateClosing) + backend_state = XenbusStateClosed; + break; + } + + if (backend_state != bend->backend_state) { + DPRINTK("Switching from state %s (%d) to %s (%d)\n", + xenbus_strstate(bend->backend_state), + bend->backend_state, + xenbus_strstate(backend_state), backend_state); + bend->backend_state = backend_state; + net_accel_update_state(dev, backend_state); + } + + wake_up(&bend->state_wait_queue); +} + + +/* accelstate on the frontend's xenbus node has changed */ +static void bend_domu_accel_change(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int state; + struct netback_accel *bend; + + bend = container_of(watch, struct netback_accel, domu_accel_watch); + if (bend->domu_accel_watch.node != NULL) { + struct xenbus_device *dev = + (struct xenbus_device *)bend->hdev_data; + VPRINTK("Watch matched, got dev %p otherend %p\n", + dev, dev->otherend); + /* + * dev->otherend != NULL check to protect against + * watch firing when domain goes away and we haven't + * yet cleaned up + */ + if (!dev->otherend || + !xenbus_exists(XBT_NIL, watch->node, "") || + strncmp(dev->otherend, vec[XS_WATCH_PATH], + strlen(dev->otherend))) { + DPRINTK("Ignoring watch as otherend seems invalid\n"); + return; + } + + mutex_lock(&bend->bend_mutex); + + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", + &state); + netback_accel_frontend_changed(dev, state); + + mutex_unlock(&bend->bend_mutex); + } +} + +/* Setup watch on frontend's accelstate */ +static int setup_domu_accel_watch(struct xenbus_device *dev, + struct netback_accel *bend) +{ + int err; + + VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate"); + + err = xenbus_watch_path2(dev, dev->otherend, "accelstate", + &bend->domu_accel_watch, + bend_domu_accel_change); + if (err) { + EPRINTK("%s: Failed to register xenbus watch: %d\n", + __FUNCTION__, err); + goto fail; + } + return 0; + fail: + bend->domu_accel_watch.node = NULL; + return err; +} + + +int netback_accel_probe(struct xenbus_device *dev) +{ + struct netback_accel *bend; + struct backend_info *binfo; + int err; + + DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename); + + /* Allocate structure to store all our state... */ + bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL); + if (bend == NULL) { + DPRINTK("%s: no memory for bend\n", __FUNCTION__); + return -ENOMEM; + } + + mutex_init(&bend->bend_mutex); + + mutex_lock(&bend->bend_mutex); + + /* ...and store it where we can get at it */ + binfo = dev_get_drvdata(&dev->dev); + binfo->netback_accel_priv = bend; + /* And vice-versa */ + bend->hdev_data = dev; + + DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend); + + init_waitqueue_head(&bend->state_wait_queue); + bend->vnic_is_setup = 0; + bend->frontend_state = XenbusStateUnknown; + bend->backend_state = XenbusStateClosed; + bend->removing = 0; + + sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end, + &bend->vif_num); + + err = read_nicname(dev, bend); + if (err) { + /* + * Technically not an error, just means we're not + * supposed to accelerate this + */ + DPRINTK("failed to get device name\n"); + goto fail_nicname; + } + + /* + * Look up the device name in the list of NICs provided by + * driverlink to get the hardware type. + */ + err = netback_accel_sf_hwtype(bend); + if (err) { + /* + * Technically not an error, just means we're not + * supposed to accelerate this, probably belongs to + * some other backend + */ + DPRINTK("failed to match device name\n"); + goto fail_init_type; + } + + err = publish_frontend_name(dev); + if (err) + goto fail_publish; + + err = netback_accel_debugfs_create(bend); + if (err) + goto fail_debugfs; + + mutex_unlock(&bend->bend_mutex); + + err = setup_config_accel_watch(dev, bend); + if (err) + goto fail_config_watch; + + err = setup_domu_accel_watch(dev, bend); + if (err) + goto fail_domu_watch; + + /* + * Indicate to the other end that we're ready to start unless + * the watch has already fired. + */ + mutex_lock(&bend->bend_mutex); + if (bend->backend_state == XenbusStateClosed) { + bend->backend_state = XenbusStateInitialising; + net_accel_update_state(dev, XenbusStateInitialising); + } + mutex_unlock(&bend->bend_mutex); + + mutex_lock(&bend_list_mutex); + link_bend(bend); + mutex_unlock(&bend_list_mutex); + + return 0; + +fail_domu_watch: + + unregister_xenbus_watch(&bend->config_accel_watch); + kfree(bend->config_accel_watch.node); +fail_config_watch: + + /* + * Flush the scheduled work queue before freeing bend to get + * rid of any pending netback_accel_msg_rx_handler() + */ + flush_scheduled_work(); + + mutex_lock(&bend->bend_mutex); + net_accel_update_state(dev, XenbusStateUnknown); + netback_accel_debugfs_remove(bend); +fail_debugfs: + + unpublish_frontend_name(dev); +fail_publish: + + /* No need to reverse netback_accel_sf_hwtype. */ +fail_init_type: + + kfree(bend->nicname); +fail_nicname: + binfo->netback_accel_priv = NULL; + mutex_unlock(&bend->bend_mutex); + kfree(bend); + return err; +} + + +int netback_accel_remove(struct xenbus_device *dev) +{ + struct backend_info *binfo; + struct netback_accel *bend; + int frontend_state; + + binfo = dev_get_drvdata(&dev->dev); + bend = (struct netback_accel *) binfo->netback_accel_priv; + + DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend); + + BUG_ON(bend == NULL); + + mutex_lock(&bend_list_mutex); + unlink_bend(bend); + mutex_unlock(&bend_list_mutex); + + mutex_lock(&bend->bend_mutex); + + /* Reject any requests to connect. */ + bend->removing = 1; + + /* + * Switch to closing to tell the other end that we're going + * away. + */ + if (bend->backend_state != XenbusStateClosing) { + bend->backend_state = XenbusStateClosing; + net_accel_update_state(dev, XenbusStateClosing); + } + + frontend_state = (int)XenbusStateUnknown; + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", + &frontend_state); + + mutex_unlock(&bend->bend_mutex); + + /* + * Wait until this end goes to the closed state. This happens + * in response to the other end going to the closed state. + * Don't bother doing this if the other end is already closed + * because if it is then there is nothing to do. + */ + if (frontend_state != (int)XenbusStateClosed && + frontend_state != (int)XenbusStateUnknown) + wait_event(bend->state_wait_queue, + bend->backend_state == XenbusStateClosed); + + unregister_xenbus_watch(&bend->domu_accel_watch); + kfree(bend->domu_accel_watch.node); + + unregister_xenbus_watch(&bend->config_accel_watch); + kfree(bend->config_accel_watch.node); + + /* + * Flush the scheduled work queue before freeing bend to get + * rid of any pending netback_accel_msg_rx_handler() + */ + flush_scheduled_work(); + + mutex_lock(&bend->bend_mutex); + + /* Tear down the vnic if it was set up. */ + if (bend->vnic_is_setup) { + bend->vnic_is_setup = 0; + cleanup_vnic(bend); + } + + bend->backend_state = XenbusStateUnknown; + net_accel_update_state(dev, XenbusStateUnknown); + + netback_accel_debugfs_remove(bend); + + unpublish_frontend_name(dev); + + kfree(bend->nicname); + + binfo->netback_accel_priv = NULL; + + mutex_unlock(&bend->bend_mutex); + + kfree(bend); + + return 0; +} + + +void netback_accel_shutdown_bends(void) +{ + mutex_lock(&bend_list_mutex); + /* + * I think we should have had a remove callback for all + * interfaces before being allowed to unload the module + */ + BUG_ON(bend_list != NULL); + mutex_unlock(&bend_list_mutex); +} + + +void netback_accel_set_closing(struct netback_accel *bend) +{ + + bend->backend_state = XenbusStateClosing; + net_accel_update_state((struct xenbus_device *)bend->hdev_data, + XenbusStateClosing); +} --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel.h @@ -0,0 +1,392 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETBACK_ACCEL_H +#define NETBACK_ACCEL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "accel_shared_fifo.h" +#include "accel_msg_iface.h" +#include "accel_util.h" + +/************************************************************************** + * Datatypes + **************************************************************************/ + +#define NETBACK_ACCEL_DEFAULT_MAX_FILTERS (8) +#define NETBACK_ACCEL_DEFAULT_MAX_MCASTS (8) +#define NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES (384) +/* Variable to store module parameter for max_buf_pages */ +extern unsigned sfc_netback_max_pages; + +#define NETBACK_ACCEL_STATS 1 + +#if NETBACK_ACCEL_STATS +#define NETBACK_ACCEL_STATS_OP(x) x +#else +#define NETBACK_ACCEL_STATS_OP(x) +#endif + +/*! Statistics for a given backend */ +struct netback_accel_stats { + /*! Number of eventq wakeup events */ + u64 evq_wakeups; + /*! Number of eventq timeout events */ + u64 evq_timeouts; + /*! Number of filters used */ + u32 num_filters; + /*! Number of buffer pages registered */ + u32 num_buffer_pages; +}; + + +/* Debug fs nodes for each of the above stats */ +struct netback_accel_dbfs { + struct dentry *evq_wakeups; + struct dentry *evq_timeouts; + struct dentry *num_filters; + struct dentry *num_buffer_pages; +}; + + +/*! Resource limits for a given NIC */ +struct netback_accel_limits { + int max_filters; /*!< Max. number of filters to use. */ + int max_mcasts; /*!< Max. number of mcast subscriptions */ + int max_buf_pages; /*!< Max. number of pages of NIC buffers */ +}; + + +/*! The state for an instance of the back end driver. */ +struct netback_accel { + /*! mutex to protect this state */ + struct mutex bend_mutex; + + /*! Watches on xenstore */ + struct xenbus_watch domu_accel_watch; + struct xenbus_watch config_accel_watch; + + /*! Pointer to whatever device cookie ties us in to the hypervisor */ + void *hdev_data; + + /*! FIFO indices. Next page is msg FIFOs */ + struct net_accel_shared_page *shared_page; + + /*! Defer control message processing */ + struct work_struct handle_msg; + + /*! Identifies other end VM and interface.*/ + int far_end; + int vif_num; + + /*!< To unmap the shared pages */ + void *sh_pages_unmap; + + /* Resource tracking */ + /*! Limits on H/W & Dom0 resources */ + struct netback_accel_limits quotas; + + /* Hardware resources */ + /*! The H/W type of associated NIC */ + enum net_accel_hw_type hw_type; + /*! State of allocation */ + int hw_state; + /*! How to set up the acceleration for this hardware */ + int (*accel_setup)(struct netback_accel *); + /*! And how to stop it. */ + void (*accel_shutdown)(struct netback_accel *); + + /*! The physical/real net_dev for this interface */ + struct net_device *net_dev; + + /*! Magic pointer to locate state in fowarding table */ + void *fwd_priv; + + /*! Message FIFO */ + sh_msg_fifo2 to_domU; + /*! Message FIFO */ + sh_msg_fifo2 from_domU; + + /*! General notification channel id */ + int msg_channel; + /*! General notification channel irq */ + int msg_channel_irq; + + /*! Event channel id dedicated to network packet interrupts. */ + int net_channel; + /*! Event channel irq dedicated to network packets interrupts */ + int net_channel_irq; + + /*! The MAC address the frontend goes by. */ + u8 mac[ETH_ALEN]; + /*! Driver name of associated NIC */ + char *nicname; + + /*! Array of pointers to buffer pages mapped */ + grant_handle_t *buffer_maps; + u64 *buffer_addrs; + /*! Index into buffer_maps */ + int buffer_maps_index; + /*! Max number of pages that domU is allowed/will request to map */ + int max_pages; + + /*! Pointer to hardware specific private area */ + void *accel_hw_priv; + + /*! Wait queue for changes in accelstate. */ + wait_queue_head_t state_wait_queue; + + /*! Current state of the frontend according to the xenbus + * watch. */ + XenbusState frontend_state; + + /*! Current state of this backend. */ + XenbusState backend_state; + + /*! Non-zero if the backend is being removed. */ + int removing; + + /*! Non-zero if the setup_vnic has been called. */ + int vnic_is_setup; + +#if NETBACK_ACCEL_STATS + struct netback_accel_stats stats; +#endif +#if defined(CONFIG_DEBUG_FS) + char *dbfs_dir_name; + struct dentry *dbfs_dir; + struct netback_accel_dbfs dbfs; +#endif + + /*! List */ + struct netback_accel *next_bend; +}; + + +/* + * Values for netback_accel.hw_state. States of resource allocation + * we can go through + */ +/*! No hardware has yet been allocated. */ +#define NETBACK_ACCEL_RES_NONE (0) +/*! Hardware has been allocated. */ +#define NETBACK_ACCEL_RES_ALLOC (1) +#define NETBACK_ACCEL_RES_FILTER (2) +#define NETBACK_ACCEL_RES_HWINFO (3) + +/*! Filtering specification. This assumes that for VNIC support we + * will always want wildcard entries, so only specifies the + * destination IP/port + */ +struct netback_accel_filter_spec { + /*! Internal, used to access efx_vi API */ + void *filter_handle; + + /*! Destination IP in network order */ + u32 destip_be; + /*! Destination port in network order */ + u16 destport_be; + /*! Mac address */ + u8 mac[ETH_ALEN]; + /*! TCP or UDP */ + u8 proto; +}; + + +/************************************************************************** + * From accel.c + **************************************************************************/ + +/*! \brief Start up all the acceleration plugins + * + * \return 0 on success, an errno on failure + */ +extern int netback_accel_init_accel(void); + +/*! \brief Shut down all the acceleration plugins + */ +extern void netback_accel_shutdown_accel(void); + + +/************************************************************************** + * From accel_fwd.c + **************************************************************************/ + +/*! \brief Init the forwarding infrastructure + * \return 0 on success, or -ENOMEM if it couldn't get memory for the + * forward table + */ +extern int netback_accel_init_fwd(void); + +/*! \brief Shut down the forwarding and free memory. */ +extern void netback_accel_shutdown_fwd(void); + +/*! Initialise each nic port's fowarding table */ +extern void *netback_accel_init_fwd_port(void); +extern void netback_accel_shutdown_fwd_port(void *fwd_priv); + +/*! \brief Add an entry to the forwarding table. + * \param mac : MAC address, used as hash key + * \param ctxt : value to associate with key (can be NULL, see + * netback_accel_fwd_set_context) + * \return 0 on success, -ENOMEM if table was full and could no grow it + */ +extern int netback_accel_fwd_add(const __u8 *mac, void *context, + void *fwd_priv); + +/*! \brief Remove an entry from the forwarding table. + * \param mac : the MAC address to remove + * \return nothing: it is not an error if the mac was not in the table + */ +extern void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv); + +/*! \brief Set the context pointer for an existing fwd table entry. + * \param mac : key that is already present in the table + * \param context : new value to associate with key + * \return 0 on success, -ENOENT if mac not present in table. + */ +extern int netback_accel_fwd_set_context(const __u8 *mac, void *context, + void *fwd_priv); + +/************************************************************************** + * From accel_msg.c + **************************************************************************/ + + +/*! \brief Send the start-of-day message that handshakes with the VNIC + * and tells it its MAC address. + * + * \param bend The back end driver data structure + * \param version The version of communication to use, e.g. NET_ACCEL_MSG_VERSION + */ +extern void netback_accel_msg_tx_hello(struct netback_accel *bend, + unsigned version); + +/*! \brief Send a "there's a new local mac address" message + * + * \param bend The back end driver data structure for the vnic to send + * the message to + * \param mac Pointer to the new mac address + */ +extern void netback_accel_msg_tx_new_localmac(struct netback_accel *bend, + const void *mac); + +/*! \brief Send a "a mac address that was local has gone away" message + * + * \param bend The back end driver data structure for the vnic to send + * the message to + * \param mac Pointer to the old mac address + */ +extern void netback_accel_msg_tx_old_localmac(struct netback_accel *bend, + const void *mac); + +extern void netback_accel_set_interface_state(struct netback_accel *bend, + int up); + +/*! \brief Process the message queue for a bend that has just + * interrupted. + * + * Demultiplexs an interrupt from the front end driver, taking + * messages from the fifo and taking appropriate action. + * + * \param bend The back end driver data structure + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +extern void netback_accel_msg_rx_handler(struct work_struct *arg); +#else +extern void netback_accel_msg_rx_handler(void *bend_void); +#endif + +/************************************************************************** + * From accel_xenbus.c + **************************************************************************/ +/*! List of all the bends currently in existence. */ +extern struct netback_accel *bend_list; +extern struct mutex bend_list_mutex; + +/*! \brief Probe a new network interface. */ +extern int netback_accel_probe(struct xenbus_device *dev); + +/*! \brief Remove a network interface. */ +extern int netback_accel_remove(struct xenbus_device *dev); + +/*! \brief Shutdown all accelerator backends */ +extern void netback_accel_shutdown_bends(void); + +/*! \brief Initiate the xenbus state teardown handshake */ +extern void netback_accel_set_closing(struct netback_accel *bend); + +/************************************************************************** + * From accel_debugfs.c + **************************************************************************/ +/*! Global statistics */ +struct netback_accel_global_stats { + /*! Number of TX packets seen through driverlink */ + u64 dl_tx_packets; + /*! Number of TX packets seen through driverlink we didn't like */ + u64 dl_tx_bad_packets; + /*! Number of RX packets seen through driverlink */ + u64 dl_rx_packets; + /*! Number of mac addresses we are forwarding to */ + u32 num_fwds; +}; + +/*! Debug fs entries for each of the above stats */ +struct netback_accel_global_dbfs { + struct dentry *dl_tx_packets; + struct dentry *dl_tx_bad_packets; + struct dentry *dl_rx_packets; + struct dentry *num_fwds; +}; + +#if NETBACK_ACCEL_STATS +extern struct netback_accel_global_stats global_stats; +#endif + +/*! \brief Initialise the debugfs root and populate with global stats */ +extern void netback_accel_debugfs_init(void); + +/*! \brief Remove our debugfs root directory */ +extern void netback_accel_debugfs_fini(void); + +/*! \brief Add per-bend statistics to debug fs */ +extern int netback_accel_debugfs_create(struct netback_accel *bend); +/*! \brief Remove per-bend statistics from debug fs */ +extern int netback_accel_debugfs_remove(struct netback_accel *bend); + +#endif /* NETBACK_ACCEL_H */ + + --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_solarflare.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_solarflare.h @@ -0,0 +1,88 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETBACK_ACCEL_SOLARFLARE_H +#define NETBACK_ACCEL_SOLARFLARE_H + +#include "accel.h" +#include "accel_msg_iface.h" + +#include "driverlink_api.h" + +#define MAX_NICS 5 +#define MAX_PORTS 2 + + +extern int netback_accel_sf_init(void); +extern void netback_accel_sf_shutdown(void); +extern int netback_accel_sf_hwtype(struct netback_accel *bend); + +extern int netback_accel_sf_char_init(void); +extern void netback_accel_sf_char_shutdown(void); + +extern int netback_accel_setup_vnic_hw(struct netback_accel *bend); +extern void netback_accel_shutdown_vnic_hw(struct netback_accel *bend); + +extern int netback_accel_add_buffers(struct netback_accel *bend, int pages, + int log2_pages, u32 *grants, + u32 *buf_addr_out); +extern int netback_accel_remove_buffers(struct netback_accel *bend); + + +/* Add a filter for the specified IP/port to the backend */ +extern int +netback_accel_filter_check_add(struct netback_accel *bend, + struct netback_accel_filter_spec *filt); +/* Remove a filter entry for the specific device and IP/port */ +extern +void netback_accel_filter_remove_index(struct netback_accel *bend, + int filter_index); +extern +void netback_accel_filter_remove_spec(struct netback_accel *bend, + struct netback_accel_filter_spec *filt); + +/* This is designed to look a bit like a skb */ +struct netback_pkt_buf { + union { + unsigned char *raw; + } mac; + union { + struct iphdr *iph; + struct arphdr *arph; + unsigned char *raw; + } nh; + int protocol; +}; + +/*! \brief Handle a received packet: insert fast path filters as necessary + * \param skb The packet buffer + */ +extern void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv); + +/*! \brief Handle a transmitted packet: update fast path filters as necessary + * \param skb The packet buffer + */ +extern void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv); + +#endif /* NETBACK_ACCEL_SOLARFLARE_H */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_solarflare.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_solarflare.c @@ -0,0 +1,1236 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include "common.h" + +#include "accel.h" +#include "accel_solarflare.h" +#include "accel_msg_iface.h" +#include "accel_util.h" + +#include "accel_cuckoo_hash.h" + +#include "ci/driver/resource/efx_vi.h" + +#include "ci/efrm/nic_table.h" +#include "ci/efhw/public.h" + +#include +#include +#include +#include + +#include "driverlink_api.h" + +#define SF_XEN_RX_USR_BUF_SIZE 2048 + +struct falcon_bend_accel_priv { + struct efx_vi_state *efx_vih; + + /*! Array of pointers to dma_map state, used so VNIC can + * request their removal in a single message + */ + struct efx_vi_dma_map_state **dma_maps; + /*! Index into dma_maps */ + int dma_maps_index; + + /*! Serialises access to filters */ + spinlock_t filter_lock; + /*! Bitmap of which filters are free */ + unsigned long free_filters; + /*! Used for index normalisation */ + u32 filter_idx_mask; + struct netback_accel_filter_spec *fspecs; + cuckoo_hash_table filter_hash_table; + + u32 txdmaq_gnt; + u32 rxdmaq_gnt; + u32 doorbell_gnt; + u32 evq_rptr_gnt; + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES]; + u32 evq_npages; +}; + +/* Forward declaration */ +static int netback_accel_filter_init(struct netback_accel *); +static void netback_accel_filter_shutdown(struct netback_accel *); + +/************************************************************************** + * + * Driverlink stuff + * + **************************************************************************/ + +struct driverlink_port { + struct list_head link; + enum net_accel_hw_type type; + struct net_device *net_dev; + struct efx_dl_device *efx_dl_dev; + void *fwd_priv; +}; + +static struct list_head dl_ports; + +/* This mutex protects global state, such as the dl_ports list */ +DEFINE_MUTEX(accel_mutex); + +static int init_done = 0; + +/* The DL callbacks */ + + +#if defined(EFX_USE_FASTCALL) +static enum efx_veto fastcall +#else +static enum efx_veto +#endif +bend_dl_tx_packet(struct efx_dl_device *efx_dl_dev, + struct sk_buff *skb) +{ + struct driverlink_port *port = efx_dl_dev->priv; + + BUG_ON(port == NULL); + + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); + if (skb_mac_header_was_set(skb)) + netback_accel_tx_packet(skb, port->fwd_priv); + else { + DPRINTK("Ignoring packet with missing mac address\n"); + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_bad_packets++); + } + return EFX_ALLOW_PACKET; +} + +/* EFX_USE_FASTCALL */ +#if defined(EFX_USE_FASTCALL) +static enum efx_veto fastcall +#else +static enum efx_veto +#endif +bend_dl_rx_packet(struct efx_dl_device *efx_dl_dev, + const char *pkt_buf, int pkt_len) +{ + struct driverlink_port *port = efx_dl_dev->priv; + struct netback_pkt_buf pkt; + struct ethhdr *eh; + + BUG_ON(port == NULL); + + pkt.mac.raw = (char *)pkt_buf; + pkt.nh.raw = (char *)pkt_buf + ETH_HLEN; + eh = (struct ethhdr *)pkt_buf; + pkt.protocol = eh->h_proto; + + NETBACK_ACCEL_STATS_OP(global_stats.dl_rx_packets++); + netback_accel_rx_packet(&pkt, port->fwd_priv); + return EFX_ALLOW_PACKET; +} + + +/* Callbacks we'd like to get from the netdriver through driverlink */ +struct efx_dl_callbacks bend_dl_callbacks = + { + .tx_packet = bend_dl_tx_packet, + .rx_packet = bend_dl_rx_packet, + }; + + +static struct netback_accel_hooks accel_hooks = { + THIS_MODULE, + &netback_accel_probe, + &netback_accel_remove +}; + + +/* Driver link probe - register our callbacks */ +static int bend_dl_probe(struct efx_dl_device *efx_dl_dev, + const struct net_device *net_dev, + const struct efx_dl_device_info *dev_info, + const char* silicon_rev) +{ + int rc; + enum net_accel_hw_type type; + struct driverlink_port *port; + + DPRINTK("%s: %s\n", __FUNCTION__, silicon_rev); + + if (strcmp(silicon_rev, "falcon/a1") == 0) + type = NET_ACCEL_MSG_HWTYPE_FALCON_A; + else if (strcmp(silicon_rev, "falcon/b0") == 0) + type = NET_ACCEL_MSG_HWTYPE_FALCON_B; + else { + EPRINTK("%s: unsupported silicon %s\n", __FUNCTION__, + silicon_rev); + rc = -EINVAL; + goto fail1; + } + + port = kmalloc(sizeof(struct driverlink_port), GFP_KERNEL); + if (port == NULL) { + EPRINTK("%s: no memory for dl probe\n", __FUNCTION__); + rc = -ENOMEM; + goto fail1; + } + + port->efx_dl_dev = efx_dl_dev; + efx_dl_dev->priv = port; + + port->fwd_priv = netback_accel_init_fwd_port(); + if (port->fwd_priv == NULL) { + EPRINTK("%s: failed to set up forwarding for port\n", + __FUNCTION__); + rc = -ENOMEM; + goto fail2; + } + + rc = efx_dl_register_callbacks(efx_dl_dev, &bend_dl_callbacks); + if (rc != 0) { + EPRINTK("%s: register_callbacks failed\n", __FUNCTION__); + goto fail3; + } + + port->type = type; + port->net_dev = (struct net_device *)net_dev; + + mutex_lock(&accel_mutex); + list_add(&port->link, &dl_ports); + mutex_unlock(&accel_mutex); + + rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0, + port->net_dev->name, &accel_hooks); + + if (rc < 0) { + EPRINTK("Xen netback accelerator version mismatch\n"); + goto fail4; + } else if (rc > 0) { + /* + * In future may want to add backwards compatibility + * and accept certain subsets of previous versions + */ + EPRINTK("Xen netback accelerator version mismatch\n"); + goto fail4; + } + + return 0; + + fail4: + mutex_lock(&accel_mutex); + list_del(&port->link); + mutex_unlock(&accel_mutex); + + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks); + fail3: + netback_accel_shutdown_fwd_port(port->fwd_priv); + fail2: + efx_dl_dev->priv = NULL; + kfree(port); + fail1: + return rc; +} + + +static void bend_dl_remove(struct efx_dl_device *efx_dl_dev) +{ + struct driverlink_port *port; + + DPRINTK("Unregistering driverlink callbacks.\n"); + + mutex_lock(&accel_mutex); + + port = (struct driverlink_port *)efx_dl_dev->priv; + + BUG_ON(list_empty(&dl_ports)); + BUG_ON(port == NULL); + BUG_ON(port->efx_dl_dev != efx_dl_dev); + + netback_disconnect_accelerator(0, port->net_dev->name); + + list_del(&port->link); + + mutex_unlock(&accel_mutex); + + efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks); + netback_accel_shutdown_fwd_port(port->fwd_priv); + + efx_dl_dev->priv = NULL; + kfree(port); + + return; +} + + +static struct efx_dl_driver bend_dl_driver = + { + .name = "SFC Xen backend", + .probe = bend_dl_probe, + .remove = bend_dl_remove, + }; + + +int netback_accel_sf_init(void) +{ + int rc, nic_i; + struct efhw_nic *nic; + + INIT_LIST_HEAD(&dl_ports); + + rc = efx_dl_register_driver(&bend_dl_driver); + /* If we couldn't find the NET driver, give up */ + if (rc == -ENOENT) + return rc; + + if (rc == 0) { + EFRM_FOR_EACH_NIC(nic_i, nic) + falcon_nic_set_rx_usr_buf_size(nic, + SF_XEN_RX_USR_BUF_SIZE); + } + + init_done = (rc == 0); + return rc; +} + + +void netback_accel_sf_shutdown(void) +{ + if (!init_done) + return; + DPRINTK("Unregistering driverlink driver\n"); + + /* + * This will trigger removal callbacks for all the devices, which + * will unregister their callbacks, disconnect from netfront, etc. + */ + efx_dl_unregister_driver(&bend_dl_driver); +} + + +int netback_accel_sf_hwtype(struct netback_accel *bend) +{ + struct driverlink_port *port; + + mutex_lock(&accel_mutex); + + list_for_each_entry(port, &dl_ports, link) { + if (strcmp(bend->nicname, port->net_dev->name) == 0) { + bend->hw_type = port->type; + bend->accel_setup = netback_accel_setup_vnic_hw; + bend->accel_shutdown = netback_accel_shutdown_vnic_hw; + bend->fwd_priv = port->fwd_priv; + bend->net_dev = port->net_dev; + mutex_unlock(&accel_mutex); + return 0; + } + } + + mutex_unlock(&accel_mutex); + + EPRINTK("Failed to identify backend device '%s' with a NIC\n", + bend->nicname); + + return -ENOENT; +} + + +/**************************************************************************** + * Resource management code + ***************************************************************************/ + +static int alloc_page_state(struct netback_accel *bend, int max_pages) +{ + struct falcon_bend_accel_priv *accel_hw_priv; + + if (max_pages < 0 || max_pages > bend->quotas.max_buf_pages) { + EPRINTK("%s: invalid max_pages: %d\n", __FUNCTION__, max_pages); + return -EINVAL; + } + + accel_hw_priv = kzalloc(sizeof(struct falcon_bend_accel_priv), + GFP_KERNEL); + if (accel_hw_priv == NULL) { + EPRINTK("%s: no memory for accel_hw_priv\n", __FUNCTION__); + return -ENOMEM; + } + + accel_hw_priv->dma_maps = kzalloc + (sizeof(struct efx_vi_dma_map_state **) * + (max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ), GFP_KERNEL); + if (accel_hw_priv->dma_maps == NULL) { + EPRINTK("%s: no memory for dma_maps\n", __FUNCTION__); + kfree(accel_hw_priv); + return -ENOMEM; + } + + bend->buffer_maps = kzalloc(sizeof(struct vm_struct *) * max_pages, + GFP_KERNEL); + if (bend->buffer_maps == NULL) { + EPRINTK("%s: no memory for buffer_maps\n", __FUNCTION__); + kfree(accel_hw_priv->dma_maps); + kfree(accel_hw_priv); + return -ENOMEM; + } + + bend->buffer_addrs = kzalloc(sizeof(u64) * max_pages, GFP_KERNEL); + if (bend->buffer_addrs == NULL) { + kfree(bend->buffer_maps); + kfree(accel_hw_priv->dma_maps); + kfree(accel_hw_priv); + return -ENOMEM; + } + + bend->accel_hw_priv = accel_hw_priv; + + return 0; +} + + +static int free_page_state(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv; + + DPRINTK("%s: %p\n", __FUNCTION__, bend); + + accel_hw_priv = bend->accel_hw_priv; + + if (accel_hw_priv) { + kfree(accel_hw_priv->dma_maps); + kfree(bend->buffer_maps); + kfree(bend->buffer_addrs); + kfree(accel_hw_priv); + bend->accel_hw_priv = NULL; + bend->max_pages = 0; + } + + return 0; +} + + +/* The timeout event callback for the event q */ +static void bend_evq_timeout(void *context, int is_timeout) +{ + struct netback_accel *bend = (struct netback_accel *)context; + if (is_timeout) { + /* Pass event to vnic front end driver */ + VPRINTK("timeout event to %d\n", bend->net_channel); + NETBACK_ACCEL_STATS_OP(bend->stats.evq_timeouts++); + notify_remote_via_irq(bend->net_channel_irq); + } else { + /* It's a wakeup event, used by Falcon */ + VPRINTK("wakeup to %d\n", bend->net_channel); + NETBACK_ACCEL_STATS_OP(bend->stats.evq_wakeups++); + notify_remote_via_irq(bend->net_channel_irq); + } +} + + +/* + * Create the eventq and associated gubbins for communication with the + * front end vnic driver + */ +static int ef_get_vnic(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv; + int rc = 0; + + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_NONE); + + /* Allocate page related state and accel_hw_priv */ + rc = alloc_page_state(bend, bend->max_pages); + if (rc != 0) { + EPRINTK("Failed to allocate page state: %d\n", rc); + return rc; + } + + accel_hw_priv = bend->accel_hw_priv; + + rc = efx_vi_alloc(&accel_hw_priv->efx_vih, bend->net_dev->ifindex); + if (rc != 0) { + EPRINTK("%s: efx_vi_alloc failed %d\n", __FUNCTION__, rc); + free_page_state(bend); + return rc; + } + + rc = efx_vi_eventq_register_callback(accel_hw_priv->efx_vih, + bend_evq_timeout, + bend); + if (rc != 0) { + EPRINTK("%s: register_callback failed %d\n", __FUNCTION__, rc); + efx_vi_free(accel_hw_priv->efx_vih); + free_page_state(bend); + return rc; + } + + bend->hw_state = NETBACK_ACCEL_RES_ALLOC; + + return 0; +} + + +static void ef_free_vnic(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC); + + efx_vi_eventq_kill_callback(accel_hw_priv->efx_vih); + + DPRINTK("Hardware is freeable. Will proceed.\n"); + + efx_vi_free(accel_hw_priv->efx_vih); + accel_hw_priv->efx_vih = NULL; + + VPRINTK("Free page state...\n"); + free_page_state(bend); + + bend->hw_state = NETBACK_ACCEL_RES_NONE; +} + + +static inline void ungrant_or_crash(grant_ref_t gntref, int domain) { + if (net_accel_ungrant_page(gntref) == -EBUSY) + net_accel_shutdown_remote(domain); +} + + +static void netback_accel_release_hwinfo(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + int i; + + DPRINTK("Remove dma q grants %d %d\n", accel_hw_priv->txdmaq_gnt, + accel_hw_priv->rxdmaq_gnt); + ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end); + ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end); + + DPRINTK("Remove doorbell grant %d\n", accel_hw_priv->doorbell_gnt); + ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end); + + if (bend->hw_type == NET_ACCEL_MSG_HWTYPE_FALCON_A) { + DPRINTK("Remove rptr grant %d\n", accel_hw_priv->evq_rptr_gnt); + ungrant_or_crash(accel_hw_priv->evq_rptr_gnt, bend->far_end); + } + + for (i = 0; i < accel_hw_priv->evq_npages; i++) { + DPRINTK("Remove evq grant %d\n", accel_hw_priv->evq_mem_gnts[i]); + ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end); + } + + bend->hw_state = NETBACK_ACCEL_RES_FILTER; + + return; +} + + +static int ef_bend_hwinfo_falcon_common(struct netback_accel *bend, + struct net_accel_hw_falcon_b *hwinfo) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + struct efx_vi_hw_resource_metadata res_mdata; + struct efx_vi_hw_resource res_array[EFX_VI_HW_RESOURCE_MAXSIZE]; + int rc, len = EFX_VI_HW_RESOURCE_MAXSIZE, i, pfn = 0; + unsigned long txdmaq_pfn = 0, rxdmaq_pfn = 0; + + rc = efx_vi_hw_resource_get_phys(accel_hw_priv->efx_vih, &res_mdata, + res_array, &len); + if (rc != 0) { + DPRINTK("%s: resource_get_phys returned %d\n", + __FUNCTION__, rc); + return rc; + } + + hwinfo->nic_arch = res_mdata.nic_arch; + hwinfo->nic_variant = res_mdata.nic_variant; + hwinfo->nic_revision = res_mdata.nic_revision; + + hwinfo->evq_order = res_mdata.evq_order; + hwinfo->evq_offs = res_mdata.evq_offs; + hwinfo->evq_capacity = res_mdata.evq_capacity; + hwinfo->instance = res_mdata.instance; + hwinfo->rx_capacity = res_mdata.rx_capacity; + hwinfo->tx_capacity = res_mdata.tx_capacity; + + VPRINTK("evq_order %d evq_offs %d evq_cap %d inst %d rx_cap %d tx_cap %d\n", + hwinfo->evq_order, hwinfo->evq_offs, hwinfo->evq_capacity, + hwinfo->instance, hwinfo->rx_capacity, hwinfo->tx_capacity); + + for (i = 0; i < len; i++) { + struct efx_vi_hw_resource *res = &(res_array[i]); + switch (res->type) { + case EFX_VI_HW_RESOURCE_TXDMAQ: + txdmaq_pfn = page_to_pfn(virt_to_page(res->address)); + break; + case EFX_VI_HW_RESOURCE_RXDMAQ: + rxdmaq_pfn = page_to_pfn(virt_to_page(res->address)); + break; + case EFX_VI_HW_RESOURCE_EVQTIMER: + break; + case EFX_VI_HW_RESOURCE_EVQRPTR: + case EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET: + hwinfo->evq_rptr = res->address; + break; + case EFX_VI_HW_RESOURCE_EVQMEMKVA: + accel_hw_priv->evq_npages = 1 << res_mdata.evq_order; + pfn = page_to_pfn(virt_to_page(res->address)); + break; + case EFX_VI_HW_RESOURCE_BELLPAGE: + hwinfo->doorbell_mfn = res->address; + break; + default: + EPRINTK("%s: Unknown hardware resource type %d\n", + __FUNCTION__, res->type); + break; + } + } + + VPRINTK("Passing txdmaq page pfn %lx\n", txdmaq_pfn); + rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(txdmaq_pfn), 0); + if (rc < 0) + goto fail0; + accel_hw_priv->txdmaq_gnt = hwinfo->txdmaq_gnt = rc; + + VPRINTK("Passing rxdmaq page pfn %lx\n", rxdmaq_pfn); + rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(rxdmaq_pfn), 0); + if (rc < 0) + goto fail1; + accel_hw_priv->rxdmaq_gnt = hwinfo->rxdmaq_gnt = rc; + + VPRINTK("Passing doorbell page mfn %x\n", hwinfo->doorbell_mfn); + /* Make the relevant H/W pages mappable by the far end */ + rc = net_accel_grant_page(bend->hdev_data, hwinfo->doorbell_mfn, 1); + if (rc < 0) + goto fail2; + accel_hw_priv->doorbell_gnt = hwinfo->doorbell_gnt = rc; + + /* Now do the same for the memory pages */ + /* Convert the page + length we got back for the evq to grants. */ + for (i = 0; i < accel_hw_priv->evq_npages; i++) { + rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(pfn), 0); + if (rc < 0) + goto fail3; + accel_hw_priv->evq_mem_gnts[i] = hwinfo->evq_mem_gnts[i] = rc; + + VPRINTK("Got grant %u for evq pfn %x\n", hwinfo->evq_mem_gnts[i], + pfn); + pfn++; + } + + return 0; + + fail3: + for (i = i - 1; i >= 0; i--) { + ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end); + } + ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end); + fail2: + ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end); + fail1: + ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end); + fail0: + return rc; +} + + +static int ef_bend_hwinfo_falcon_a(struct netback_accel *bend, + struct net_accel_hw_falcon_a *hwinfo) +{ + int rc, i; + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + + if ((rc = ef_bend_hwinfo_falcon_common(bend, &hwinfo->common)) != 0) + return rc; + + /* + * Note that unlike the above, where the message field is the + * page number, here evq_rptr is the entire address because + * it is currently a pointer into the densely mapped timer page. + */ + VPRINTK("Passing evq_rptr pfn %x for rptr %x\n", + hwinfo->common.evq_rptr >> PAGE_SHIFT, + hwinfo->common.evq_rptr); + rc = net_accel_grant_page(bend->hdev_data, + hwinfo->common.evq_rptr >> PAGE_SHIFT, 0); + if (rc < 0) { + /* Undo ef_bend_hwinfo_falcon_common() */ + ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end); + ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end); + ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end); + for (i = 0; i < accel_hw_priv->evq_npages; i++) { + ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], + bend->far_end); + } + return rc; + } + + accel_hw_priv->evq_rptr_gnt = hwinfo->evq_rptr_gnt = rc; + VPRINTK("evq_rptr_gnt got %d\n", hwinfo->evq_rptr_gnt); + + return 0; +} + + +static int ef_bend_hwinfo_falcon_b(struct netback_accel *bend, + struct net_accel_hw_falcon_b *hwinfo) +{ + return ef_bend_hwinfo_falcon_common(bend, hwinfo); +} + + +/* + * Fill in the message with a description of the hardware resources, based on + * the H/W type + */ +static int netback_accel_hwinfo(struct netback_accel *bend, + struct net_accel_msg_hw *msgvi) +{ + int rc = 0; + + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER); + + msgvi->type = bend->hw_type; + switch (bend->hw_type) { + case NET_ACCEL_MSG_HWTYPE_FALCON_A: + rc = ef_bend_hwinfo_falcon_a(bend, &msgvi->resources.falcon_a); + break; + case NET_ACCEL_MSG_HWTYPE_FALCON_B: + rc = ef_bend_hwinfo_falcon_b(bend, &msgvi->resources.falcon_b); + break; + case NET_ACCEL_MSG_HWTYPE_NONE: + /* Nothing to do. The slow path should just work. */ + break; + } + + if (rc == 0) + bend->hw_state = NETBACK_ACCEL_RES_HWINFO; + + return rc; +} + + +/* Allocate hardware resources and make them available to the client domain */ +int netback_accel_setup_vnic_hw(struct netback_accel *bend) +{ + struct net_accel_msg msg; + int err; + + /* Allocate the event queue, VI and so on. */ + err = ef_get_vnic(bend); + if (err) { + EPRINTK("Failed to allocate hardware resource for bend:" + "error %d\n", err); + return err; + } + + /* Set up the filter management */ + err = netback_accel_filter_init(bend); + if (err) { + EPRINTK("Filter setup failed, error %d", err); + ef_free_vnic(bend); + return err; + } + + net_accel_msg_init(&msg, NET_ACCEL_MSG_SETHW); + + /* + * Extract the low-level hardware info we will actually pass to the + * other end, and set up the grants/ioremap permissions needed + */ + err = netback_accel_hwinfo(bend, &msg.u.hw); + + if (err != 0) { + netback_accel_filter_shutdown(bend); + ef_free_vnic(bend); + return err; + } + + /* Send the message, this is a reply to a hello-reply */ + err = net_accel_msg_reply_notify(bend->shared_page, + bend->msg_channel_irq, + &bend->to_domU, &msg); + + /* + * The message should succeed as it's logically a reply and we + * guarantee space for replies, but a misbehaving frontend + * could result in that behaviour, so be tolerant + */ + if (err != 0) { + netback_accel_release_hwinfo(bend); + netback_accel_filter_shutdown(bend); + ef_free_vnic(bend); + } + + return err; +} + + +/* Free hardware resources */ +void netback_accel_shutdown_vnic_hw(struct netback_accel *bend) +{ + /* + * Only try and release resources if accel_hw_priv was setup, + * otherwise there is nothing to do as we're on "null-op" + * acceleration + */ + switch (bend->hw_state) { + case NETBACK_ACCEL_RES_HWINFO: + VPRINTK("Release hardware resources\n"); + netback_accel_release_hwinfo(bend); + /* deliberate drop through */ + case NETBACK_ACCEL_RES_FILTER: + VPRINTK("Free filters...\n"); + netback_accel_filter_shutdown(bend); + /* deliberate drop through */ + case NETBACK_ACCEL_RES_ALLOC: + VPRINTK("Free vnic...\n"); + ef_free_vnic(bend); + /* deliberate drop through */ + case NETBACK_ACCEL_RES_NONE: + break; + default: + BUG(); + } +} + +/************************************************************************** + * + * Buffer table stuff + * + **************************************************************************/ + +/* + * Undo any allocation that netback_accel_msg_rx_buffer_map() has made + * if it fails half way through + */ +static inline void buffer_map_cleanup(struct netback_accel *bend, int i) +{ + while (i > 0) { + i--; + bend->buffer_maps_index--; + net_accel_unmap_device_page(bend->hdev_data, + bend->buffer_maps[bend->buffer_maps_index], + bend->buffer_addrs[bend->buffer_maps_index]); + } +} + + +int netback_accel_add_buffers(struct netback_accel *bend, int pages, int log2_pages, + u32 *grants, u32 *buf_addr_out) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + unsigned long long addr_array[NET_ACCEL_MSG_MAX_PAGE_REQ]; + int rc, i, index; + u64 dev_bus_addr; + + /* Make sure we can't overflow the dma_maps array */ + if (accel_hw_priv->dma_maps_index >= + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ) { + EPRINTK("%s: too many buffer table allocations: %d %d\n", + __FUNCTION__, accel_hw_priv->dma_maps_index, + bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ); + return -EINVAL; + } + + /* Make sure we can't overflow the buffer_maps array */ + if (bend->buffer_maps_index + pages > bend->max_pages) { + EPRINTK("%s: too many pages mapped: %d + %d > %d\n", + __FUNCTION__, bend->buffer_maps_index, + pages, bend->max_pages); + return -EINVAL; + } + + for (i = 0; i < pages; i++) { + VPRINTK("%s: mapping page %d\n", __FUNCTION__, i); + rc = net_accel_map_device_page + (bend->hdev_data, grants[i], + &bend->buffer_maps[bend->buffer_maps_index], + &dev_bus_addr); + + if (rc != 0) { + EPRINTK("error in net_accel_map_device_page\n"); + buffer_map_cleanup(bend, i); + return rc; + } + + bend->buffer_addrs[bend->buffer_maps_index] = dev_bus_addr; + + bend->buffer_maps_index++; + + addr_array[i] = dev_bus_addr; + } + + VPRINTK("%s: mapping dma addresses to vih %p\n", __FUNCTION__, + accel_hw_priv->efx_vih); + + index = accel_hw_priv->dma_maps_index; + if ((rc = efx_vi_dma_map_addrs(accel_hw_priv->efx_vih, addr_array, pages, + &(accel_hw_priv->dma_maps[index]))) < 0) { + EPRINTK("error in dma_map_pages\n"); + buffer_map_cleanup(bend, i); + return rc; + } + + accel_hw_priv->dma_maps_index++; + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages += pages); + + //DPRINTK("%s: getting map address\n", __FUNCTION__); + + *buf_addr_out = efx_vi_dma_get_map_addr(accel_hw_priv->efx_vih, + accel_hw_priv->dma_maps[index]); + + //DPRINTK("%s: done\n", __FUNCTION__); + + return 0; +} + + +int netback_accel_remove_buffers(struct netback_accel *bend) +{ + /* Only try to free buffers if accel_hw_priv was setup */ + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) { + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + int i; + + efx_vi_reset(accel_hw_priv->efx_vih); + + while (accel_hw_priv->dma_maps_index > 0) { + accel_hw_priv->dma_maps_index--; + i = accel_hw_priv->dma_maps_index; + efx_vi_dma_unmap_addrs(accel_hw_priv->efx_vih, + accel_hw_priv->dma_maps[i]); + } + + while (bend->buffer_maps_index > 0) { + VPRINTK("Unmapping granted buffer %d\n", + bend->buffer_maps_index); + bend->buffer_maps_index--; + i = bend->buffer_maps_index; + net_accel_unmap_device_page(bend->hdev_data, + bend->buffer_maps[i], + bend->buffer_addrs[i]); + } + + NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages = 0); + } + + return 0; +} + +/************************************************************************** + * + * Filter stuff + * + **************************************************************************/ + +static int netback_accel_filter_init(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + int i, rc; + + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC); + + spin_lock_init(&accel_hw_priv->filter_lock); + + if ((rc = cuckoo_hash_init(&accel_hw_priv->filter_hash_table, + 5 /* space for 32 filters */, 8)) != 0) { + EPRINTK("Failed to initialise filter hash table\n"); + return rc; + } + + accel_hw_priv->fspecs = kzalloc(sizeof(struct netback_accel_filter_spec) * + bend->quotas.max_filters, + GFP_KERNEL); + + if (accel_hw_priv->fspecs == NULL) { + EPRINTK("No memory for filter specs.\n"); + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table); + return -ENOMEM; + } + + for (i = 0; i < bend->quotas.max_filters; i++) { + accel_hw_priv->free_filters |= (1 << i); + } + + /* Base mask on highest set bit in max_filters */ + accel_hw_priv->filter_idx_mask = (1 << fls(bend->quotas.max_filters)) - 1; + VPRINTK("filter setup: max is %x mask is %x\n", + bend->quotas.max_filters, accel_hw_priv->filter_idx_mask); + + bend->hw_state = NETBACK_ACCEL_RES_FILTER; + + return 0; +} + + +static inline void make_filter_key(cuckoo_hash_ip_key *key, + struct netback_accel_filter_spec *filt) + +{ + key->local_ip = filt->destip_be; + key->local_port = filt->destport_be; + key->proto = filt->proto; +} + + +static inline +void netback_accel_free_filter(struct falcon_bend_accel_priv *accel_hw_priv, + int filter) +{ + cuckoo_hash_ip_key filter_key; + + if (!(accel_hw_priv->free_filters & (1 << filter))) { + efx_vi_filter_stop(accel_hw_priv->efx_vih, + accel_hw_priv->fspecs[filter].filter_handle); + make_filter_key(&filter_key, &(accel_hw_priv->fspecs[filter])); + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)&filter_key)) { + EPRINTK("%s: Couldn't find filter to remove from table\n", + __FUNCTION__); + BUG(); + } + } +} + + +static void netback_accel_filter_shutdown(struct netback_accel *bend) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + int i; + unsigned long flags; + + BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER); + + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); + + BUG_ON(accel_hw_priv->fspecs == NULL); + + for (i = 0; i < bend->quotas.max_filters; i++) { + netback_accel_free_filter(accel_hw_priv, i); + } + + kfree(accel_hw_priv->fspecs); + accel_hw_priv->fspecs = NULL; + accel_hw_priv->free_filters = 0; + + cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table); + + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); + + bend->hw_state = NETBACK_ACCEL_RES_ALLOC; +} + + +/*! Suggest a filter to replace when we want to insert a new one and have + * none free. + */ +static unsigned get_victim_filter(struct netback_accel *bend) +{ + /* + * We could attempt to get really clever, and may do at some + * point, but random replacement is v. cheap and low on + * pathological worst cases. + */ + unsigned index, cycles; + + rdtscl(cycles); + + /* + * Some doubt about the quality of the bottom few bits, so + * throw 'em * away + */ + index = (cycles >> 4) & ((struct falcon_bend_accel_priv *) + bend->accel_hw_priv)->filter_idx_mask; + /* + * We don't enforce that the number of filters is a power of + * two, but the masking gets us to within one subtraction of a + * valid index + */ + if (index >= bend->quotas.max_filters) + index -= bend->quotas.max_filters; + DPRINTK("backend %s->%d has no free filters. Filter %d will be evicted\n", + bend->nicname, bend->far_end, index); + return index; +} + + +/* Add a filter for the specified IP/port to the backend */ +int +netback_accel_filter_check_add(struct netback_accel *bend, + struct netback_accel_filter_spec *filt) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + struct netback_accel_filter_spec *fs; + unsigned filter_index; + unsigned long flags; + int rc, recycling = 0; + cuckoo_hash_ip_key filter_key, evict_key; + + BUG_ON(filt->proto != IPPROTO_TCP && filt->proto != IPPROTO_UDP); + + DPRINTK("Will add %s filter for dst ip %08x and dst port %d\n", + (filt->proto == IPPROTO_TCP) ? "TCP" : "UDP", + be32_to_cpu(filt->destip_be), be16_to_cpu(filt->destport_be)); + + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); + /* + * Check to see if we're already filtering this IP address and + * port. Happens if you insert a filter mid-stream as there + * are many packets backed up to be delivered to dom0 already + */ + make_filter_key(&filter_key, filt); + if (cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)(&filter_key), + &filter_index)) { + DPRINTK("Found matching filter %d already in table\n", + filter_index); + rc = -1; + goto out; + } + + if (accel_hw_priv->free_filters == 0) { + filter_index = get_victim_filter(bend); + recycling = 1; + } else { + filter_index = __ffs(accel_hw_priv->free_filters); + clear_bit(filter_index, &accel_hw_priv->free_filters); + } + + fs = &accel_hw_priv->fspecs[filter_index]; + + if (recycling) { + DPRINTK("Removing filter index %d handle %p\n", filter_index, + fs->filter_handle); + + if ((rc = efx_vi_filter_stop(accel_hw_priv->efx_vih, + fs->filter_handle)) != 0) { + EPRINTK("Couldn't clear NIC filter table entry %d\n", rc); + } + + make_filter_key(&evict_key, fs); + if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)&evict_key)) { + EPRINTK("Couldn't find filter to remove from table\n"); + BUG(); + } + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters--); + } + + /* Update the filter spec with new details */ + *fs = *filt; + + if ((rc = cuckoo_hash_add(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)&filter_key, filter_index, + 1)) != 0) { + EPRINTK("Error (%d) adding filter to table\n", rc); + accel_hw_priv->free_filters |= (1 << filter_index); + goto out; + } + + rc = efx_vi_filter(accel_hw_priv->efx_vih, filt->proto, filt->destip_be, + filt->destport_be, + (struct filter_resource_t **)&fs->filter_handle); + + if (rc != 0) { + EPRINTK("Hardware filter insertion failed. Error %d\n", rc); + accel_hw_priv->free_filters |= (1 << filter_index); + cuckoo_hash_remove(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)&filter_key); + rc = -1; + goto out; + } + + NETBACK_ACCEL_STATS_OP(bend->stats.num_filters++); + + VPRINTK("%s: success index %d handle %p\n", __FUNCTION__, filter_index, + fs->filter_handle); + + rc = filter_index; + out: + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); + return rc; +} + + +/* Remove a filter entry for the specific device and IP/port */ +static void netback_accel_filter_remove(struct netback_accel *bend, + int filter_index) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + + BUG_ON(accel_hw_priv->free_filters & (1 << filter_index)); + netback_accel_free_filter(accel_hw_priv, filter_index); + accel_hw_priv->free_filters |= (1 << filter_index); +} + + +/* Remove a filter entry for the specific device and IP/port */ +void netback_accel_filter_remove_spec(struct netback_accel *bend, + struct netback_accel_filter_spec *filt) +{ + struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv; + unsigned filter_found; + unsigned long flags; + cuckoo_hash_ip_key filter_key; + struct netback_accel_filter_spec *fs; + + if (filt->proto == IPPROTO_TCP) { + DPRINTK("Remove TCP filter for dst ip %08x and dst port %d\n", + be32_to_cpu(filt->destip_be), + be16_to_cpu(filt->destport_be)); + } else if (filt->proto == IPPROTO_UDP) { + DPRINTK("Remove UDP filter for dst ip %08x and dst port %d\n", + be32_to_cpu(filt->destip_be), + be16_to_cpu(filt->destport_be)); + } else { + /* + * This could be provoked by an evil frontend, so can't + * BUG(), but harmless as it should fail tests below + */ + DPRINTK("Non-TCP/UDP filter dst ip %08x and dst port %d\n", + be32_to_cpu(filt->destip_be), + be16_to_cpu(filt->destport_be)); + } + + spin_lock_irqsave(&accel_hw_priv->filter_lock, flags); + + make_filter_key(&filter_key, filt); + if (!cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table, + (cuckoo_hash_key *)(&filter_key), + &filter_found)) { + EPRINTK("Couldn't find matching filter already in table\n"); + goto out; + } + + /* Do a full check to make sure we've not had a hash collision */ + fs = &accel_hw_priv->fspecs[filter_found]; + if (fs->destip_be == filt->destip_be && + fs->destport_be == filt->destport_be && + fs->proto == filt->proto && + !memcmp(fs->mac, filt->mac, ETH_ALEN)) { + netback_accel_filter_remove(bend, filter_found); + } else { + EPRINTK("Entry in hash table does not match filter spec\n"); + goto out; + } + + out: + spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags); +} --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/accel_msg.c +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/accel_msg.c @@ -0,0 +1,392 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include + +#include "accel.h" +#include "accel_msg_iface.h" +#include "accel_util.h" +#include "accel_solarflare.h" + +/* Send a HELLO to front end to start things off */ +void netback_accel_msg_tx_hello(struct netback_accel *bend, unsigned version) +{ + unsigned long lock_state; + struct net_accel_msg *msg = + net_accel_msg_start_send(bend->shared_page, + &bend->to_domU, &lock_state); + /* The queue _cannot_ be full, we're the first users. */ + EPRINTK_ON(msg == NULL); + + if (msg != NULL) { + net_accel_msg_init(msg, NET_ACCEL_MSG_HELLO); + msg->u.hello.version = version; + msg->u.hello.max_pages = bend->quotas.max_buf_pages; + VPRINTK("Sending hello to channel %d\n", bend->msg_channel); + net_accel_msg_complete_send_notify(bend->shared_page, + &bend->to_domU, + &lock_state, + bend->msg_channel_irq); + } +} + +/* Send a local mac message to vnic */ +static void netback_accel_msg_tx_localmac(struct netback_accel *bend, + int type, const void *mac) +{ + unsigned long lock_state; + struct net_accel_msg *msg; + DECLARE_MAC_BUF(buf); + + BUG_ON(bend == NULL || mac == NULL); + + VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac)); + + msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU, + &lock_state); + + if (msg != NULL) { + net_accel_msg_init(msg, NET_ACCEL_MSG_LOCALMAC); + msg->u.localmac.flags = type; + memcpy(msg->u.localmac.mac, mac, ETH_ALEN); + net_accel_msg_complete_send_notify(bend->shared_page, + &bend->to_domU, + &lock_state, + bend->msg_channel_irq); + } else { + /* + * TODO if this happens we may leave a domU + * fastpathing packets when they should be delivered + * locally. Solution is get domU to timeout entries + * in its fastpath lookup table when it receives no RX + * traffic + */ + EPRINTK("%s: saw full queue, may need ARP timer to recover\n", + __FUNCTION__); + } +} + +/* Send an add local mac message to vnic */ +void netback_accel_msg_tx_new_localmac(struct netback_accel *bend, + const void *mac) +{ + netback_accel_msg_tx_localmac(bend, NET_ACCEL_MSG_ADD, mac); +} + + +static int netback_accel_msg_rx_buffer_map(struct netback_accel *bend, + struct net_accel_msg *msg) +{ + int log2_pages, rc; + + /* Can only allocate in power of two */ + log2_pages = log2_ge(msg->u.mapbufs.pages, 0); + if (msg->u.mapbufs.pages != pow2(log2_pages)) { + EPRINTK("%s: Can only alloc bufs in power of 2 sizes (%d)\n", + __FUNCTION__, msg->u.mapbufs.pages); + rc = -EINVAL; + goto err_out; + } + + /* + * Sanity. Assumes NET_ACCEL_MSG_MAX_PAGE_REQ is same for + * both directions/domains + */ + if (msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ) { + EPRINTK("%s: too many pages in a single message: %d %d\n", + __FUNCTION__, msg->u.mapbufs.pages, + NET_ACCEL_MSG_MAX_PAGE_REQ); + rc = -EINVAL; + goto err_out; + } + + if ((rc = netback_accel_add_buffers(bend, msg->u.mapbufs.pages, + log2_pages, msg->u.mapbufs.grants, + &msg->u.mapbufs.buf)) < 0) { + goto err_out; + } + + msg->id |= NET_ACCEL_MSG_REPLY; + + return 0; + + err_out: + EPRINTK("%s: err_out\n", __FUNCTION__); + msg->id |= NET_ACCEL_MSG_ERROR | NET_ACCEL_MSG_REPLY; + return rc; +} + + +/* Hint from frontend that one of our filters is out of date */ +static int netback_accel_process_fastpath(struct netback_accel *bend, + struct net_accel_msg *msg) +{ + struct netback_accel_filter_spec spec; + + if (msg->u.fastpath.flags & NET_ACCEL_MSG_REMOVE) { + /* + * Would be nice to BUG() this but would leave us + * vulnerable to naughty frontend + */ + EPRINTK_ON(msg->u.fastpath.flags & NET_ACCEL_MSG_ADD); + + memcpy(spec.mac, msg->u.fastpath.mac, ETH_ALEN); + spec.destport_be = msg->u.fastpath.port; + spec.destip_be = msg->u.fastpath.ip; + spec.proto = msg->u.fastpath.proto; + + netback_accel_filter_remove_spec(bend, &spec); + } + + return 0; +} + + +/* Flow control for message queues */ +inline void set_queue_not_full(struct netback_accel *bend) +{ + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B, + (unsigned long *)&bend->shared_page->aflags)) + notify_remote_via_irq(bend->msg_channel_irq); + else + VPRINTK("queue not full bit already set, not signalling\n"); +} + + +/* Flow control for message queues */ +inline void set_queue_full(struct netback_accel *bend) +{ + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B, + (unsigned long *)&bend->shared_page->aflags)) + notify_remote_via_irq(bend->msg_channel_irq); + else + VPRINTK("queue full bit already set, not signalling\n"); +} + + +void netback_accel_set_interface_state(struct netback_accel *bend, int up) +{ + bend->shared_page->net_dev_up = up; + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B, + (unsigned long *)&bend->shared_page->aflags)) + notify_remote_via_irq(bend->msg_channel_irq); + else + VPRINTK("interface up/down bit already set, not signalling\n"); +} + + +static int check_rx_hello_version(unsigned version) +{ + /* Should only happen if there's been a version mismatch */ + BUG_ON(version == NET_ACCEL_MSG_VERSION); + + if (version > NET_ACCEL_MSG_VERSION) { + /* Newer protocol, we must refuse */ + return -EPROTO; + } + + if (version < NET_ACCEL_MSG_VERSION) { + /* + * We are newer, so have discretion to accept if we + * wish. For now however, just reject + */ + return -EPROTO; + } + + return -EINVAL; +} + + +static int process_rx_msg(struct netback_accel *bend, + struct net_accel_msg *msg) +{ + int err = 0; + + switch (msg->id) { + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO: + /* Reply to a HELLO; mark ourselves as connected */ + DPRINTK("got Hello reply, version %.8x\n", + msg->u.hello.version); + + /* + * Check that we've not successfully done this + * already. NB no check at the moment that this reply + * comes after we've actually sent a HELLO as that's + * not possible with the current code structure + */ + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) + return -EPROTO; + + /* Store max_pages for accel_setup */ + if (msg->u.hello.max_pages > bend->quotas.max_buf_pages) { + EPRINTK("More pages than quota allows (%d > %d)\n", + msg->u.hello.max_pages, + bend->quotas.max_buf_pages); + /* Force it down to the quota */ + msg->u.hello.max_pages = bend->quotas.max_buf_pages; + } + bend->max_pages = msg->u.hello.max_pages; + + /* Set up the hardware visible to the other end */ + err = bend->accel_setup(bend); + if (err) { + /* This is fatal */ + DPRINTK("Hello gave accel_setup error %d\n", err); + netback_accel_set_closing(bend); + } else { + /* + * Now add the context so that packet + * forwarding will commence + */ + netback_accel_fwd_set_context(bend->mac, bend, + bend->fwd_priv); + } + break; + case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_ERROR: + EPRINTK("got Hello error, versions us:%.8x them:%.8x\n", + NET_ACCEL_MSG_VERSION, msg->u.hello.version); + + if (bend->hw_state != NETBACK_ACCEL_RES_NONE) + return -EPROTO; + + if (msg->u.hello.version != NET_ACCEL_MSG_VERSION) { + /* Error is due to version mismatch */ + err = check_rx_hello_version(msg->u.hello.version); + if (err == 0) { + /* + * It's OK to be compatible, send + * another hello with compatible version + */ + netback_accel_msg_tx_hello + (bend, msg->u.hello.version); + } else { + /* + * Tell frontend that we're not going to + * send another HELLO by going to Closing. + */ + netback_accel_set_closing(bend); + } + } + break; + case NET_ACCEL_MSG_MAPBUF: + VPRINTK("Got mapped buffers request %d\n", + msg->u.mapbufs.reqid); + + if (bend->hw_state == NETBACK_ACCEL_RES_NONE) + return -EPROTO; + + /* + * Frontend wants a buffer table entry for the + * supplied pages + */ + err = netback_accel_msg_rx_buffer_map(bend, msg); + if (net_accel_msg_reply_notify(bend->shared_page, + bend->msg_channel_irq, + &bend->to_domU, msg)) { + /* + * This is fatal as we can't tell the frontend + * about the problem through the message + * queue, and so would otherwise stalemate + */ + netback_accel_set_closing(bend); + } + break; + case NET_ACCEL_MSG_FASTPATH: + DPRINTK("Got fastpath request\n"); + + if (bend->hw_state == NETBACK_ACCEL_RES_NONE) + return -EPROTO; + + err = netback_accel_process_fastpath(bend, msg); + break; + default: + EPRINTK("Huh? Message code is %x\n", msg->id); + err = -EPROTO; + break; + } + return err; +} + + +/* Demultiplex an IRQ from the frontend driver. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +void netback_accel_msg_rx_handler(struct work_struct *arg) +#else +void netback_accel_msg_rx_handler(void *bend_void) +#endif +{ + struct net_accel_msg msg; + int err, queue_was_full = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) + struct netback_accel *bend = + container_of(arg, struct netback_accel, handle_msg); +#else + struct netback_accel *bend = (struct netback_accel *)bend_void; +#endif + + mutex_lock(&bend->bend_mutex); + + /* + * This happens when the shared pages have been unmapped, but + * the workqueue not flushed yet + */ + if (bend->shared_page == NULL) + goto done; + + if ((bend->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK) != 0) { + if (bend->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL) { + /* We've been told there may now be space. */ + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B, + (unsigned long *)&bend->shared_page->aflags); + } + + if (bend->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) { + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B, + (unsigned long *)&bend->shared_page->aflags); + queue_was_full = 1; + } + } + + while ((err = net_accel_msg_recv(bend->shared_page, &bend->from_domU, + &msg)) == 0) { + err = process_rx_msg(bend, &msg); + + if (err != 0) { + EPRINTK("%s: Error %d\n", __FUNCTION__, err); + goto err; + } + } + + err: + /* There will be space now if we can make any. */ + if (queue_was_full) + set_queue_not_full(bend); + done: + mutex_unlock(&bend->bend_mutex); + + return; +} --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/Makefile +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/Makefile @@ -0,0 +1,12 @@ +EXTRA_CFLAGS += -Idrivers/xen/sfc_netback -Idrivers/xen/sfc_netutil -Idrivers/xen/netback -Idrivers/net/sfc +EXTRA_CFLAGS += -D__ci_driver__ +EXTRA_CFLAGS += -DEFX_USE_KCOMPAT +EXTRA_CFLAGS += -Werror + +ifdef GCOV +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV +endif + +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) := sfc_netback.o + +sfc_netback-objs := accel.o accel_fwd.o accel_msg.o accel_solarflare.o accel_xenbus.o accel_debugfs.o --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat.h @@ -0,0 +1,53 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Compatability layer. Provides definitions of fundamental + * types and definitions that are used throughout CI source + * code. It does not introduce any link time dependencies, + * or include any unnecessary system headers. + */ +/*! \cidoxg_include_ci */ + +#ifndef __CI_COMPAT_H__ +#define __CI_COMPAT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + + +#ifdef __cplusplus +} +#endif + +#endif /* __CI_COMPAT_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/log.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/log.h @@ -0,0 +1,269 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Functions for logging and pretty-printing. + * \date 2002/08/07 + */ + +/*! \cidoxg_include_ci_tools */ + +#ifndef __CI_TOOLS_LOG_H__ +#define __CI_TOOLS_LOG_H__ + +#include + + +/********************************************************************** + * Logging. + */ + +/* size of internal log buffer */ +#define CI_LOG_MAX_LINE 512 +/* uses of ci_log must ensure that all trace messages are shorter than this */ +#define CI_LOG_MAX_MSG_LENGTH (CI_LOG_MAX_LINE-50) + +extern void ci_vlog(const char* fmt, va_list args) CI_HF; +extern void ci_log(const char* fmt, ...) CI_PRINTF_LIKE(1,2) CI_HF; + + /*! Set the prefix for log messages. + ** + ** Uses the storage pointed to by \em prefix. Therefore \em prefix must + ** be allocated on the heap, or statically. + */ +extern void ci_set_log_prefix(const char* prefix) CI_HF; + +typedef void (*ci_log_fn_t)(const char* msg); +extern ci_log_fn_t ci_log_fn CI_HV; + +/* Log functions. */ +extern void ci_log_null(const char* msg) CI_HF; +extern void ci_log_stderr(const char* msg) CI_HF; +extern void ci_log_stdout(const char* msg) CI_HF; +extern void ci_log_syslog(const char* msg) CI_HF; + +/*! Call the following to install special logging behaviours. */ +extern void ci_log_buffer_till_fail(void) CI_HF; +extern void ci_log_buffer_till_exit(void) CI_HF; + +extern void __ci_log_unique(const char* msg) CI_HF; +extern ci_log_fn_t __ci_log_unique_fn CI_HV; +ci_inline void ci_log_uniquify(void) { + if( ci_log_fn != __ci_log_unique ) { + __ci_log_unique_fn = ci_log_fn; + ci_log_fn = __ci_log_unique; + } +} + +extern void ci_log_file(const char* msg) CI_HF; +extern int ci_log_file_fd CI_HV; + +extern void __ci_log_nth(const char* msg) CI_HF; +extern ci_log_fn_t __ci_log_nth_fn CI_HV; +extern int ci_log_nth_n CI_HV; /* default 100 */ +ci_inline void ci_log_nth(void) { + if( ci_log_fn != __ci_log_nth ) { + __ci_log_nth_fn = ci_log_fn; + ci_log_fn = __ci_log_nth; + } +} + +extern int ci_log_level CI_HV; + +extern int ci_log_options CI_HV; +#define CI_LOG_PID 0x1 +#define CI_LOG_TID 0x2 +#define CI_LOG_TIME 0x4 +#define CI_LOG_DELTA 0x8 + +/********************************************************************** + * Used to define which mode we are in + */ +#if (defined(_WIN32) && !defined(__KERNEL__)) +typedef enum { + ci_log_md_NULL=0, + ci_log_md_ioctl, + ci_log_md_stderr, + ci_log_md_stdout, + ci_log_md_file, + ci_log_md_serial, + ci_log_md_syslog, + ci_log_md_pidfile +} ci_log_mode_t; +extern ci_log_mode_t ci_log_mode; +#endif + +/********************************************************************** + * Pretty-printing. + */ + +extern char ci_printable_char(char c) CI_HF; + +extern void (*ci_hex_dump_formatter)(char* buf, const ci_octet* s, + int i, int off, int len) CI_HV; +extern void ci_hex_dump_format_octets(char*,const ci_octet*,int,int,int) CI_HF; +extern void ci_hex_dump_format_dwords(char*,const ci_octet*,int,int,int) CI_HF; + +extern void ci_hex_dump_row(char* buf, volatile const void* s, int len, + ci_ptr_arith_t address) CI_HF; + /*!< A row contains up to 16 bytes. Row starts at [address & 15u], so + ** therefore [len + (address & 15u)] must be <= 16. + */ + +extern void ci_hex_dump(ci_log_fn_t, volatile const void*, + int len, ci_ptr_arith_t address) CI_HF; + +extern int ci_hex_dump_to_raw(const char* src_hex, void* buf, + unsigned* addr_out_opt, int* skip) CI_HF; + /*!< Recovers raw data from a single line of a hex dump. [buf] must be at + ** least 16 bytes long. Returns the number of bytes written to [buf] (in + ** range 1 -> 16), or -1 if [src_hex] doesn't contain hex data. Does not + ** cope with missing bytes at the start of a line. + */ + +extern int ci_format_eth_addr(char* buf, const void* eth_mac_addr, + char sep) CI_HF; + /*!< This will write 18 characters to including terminating null. + ** Returns number of bytes written excluding null. If [sep] is zero, ':' + ** is used. + */ + +extern int ci_parse_eth_addr(void* eth_mac_addr, + const char* str, char sep) CI_HF; + /*!< If [sep] is zero, absolutely any separator is accepted (even + ** inconsistent separators). Returns 0 on success, -1 on error. + */ + +extern int ci_format_ip4_addr(char* buf, unsigned addr_be32) CI_HF; + /*!< Formats the IP address (in network endian) in dotted-quad. Returns + ** the number of bytes written (up to 15), excluding the null. [buf] + ** must be at least 16 bytes long. + */ + +#if defined(__unix__) && ! defined(__KERNEL__) +extern int ci_format_select_set(char* s, int len_s, int nfds, const fd_set*); +extern int ci_format_select(char* s, int len_s, + int nfds, const fd_set* rds, const fd_set* wrs, + const fd_set* exs, struct timeval* timeout); +#endif + + +/********************************************************************** + * Error checking. + */ + +extern void (*ci_fail_stop_fn)(void) CI_HV; + +extern void ci_fail_stop(void) CI_HF; +extern void ci_fail_hang(void) CI_HF; +extern void ci_fail_bomb(void) CI_HF; +extern void ci_backtrace(void) CI_HF; + +#if defined __linux__ && !defined __KERNEL__ +extern void ci_fail_abort (void) CI_HF; +#endif + +#ifdef __GNUC__ +extern void +__ci_fail(const char*, ...) CI_PRINTF_LIKE(1,2) CI_HF; +#else +# if _PREFAST_ + extern void _declspec(noreturn) __ci_fail(const char* fmt, ...); +# else + extern void __ci_fail(const char* fmt, ...); +# endif + +#endif + +#define ci_warn(x) \ + do{ ci_log("WARN at %s:%d", __FILE__, __LINE__); }while(0) + +#define ci_fail(x) \ + do{ ci_log("FAIL at %s:%d", __FILE__, __LINE__); __ci_fail x; }while(0) + +extern void __ci_sys_fail(const char* fn, int rc, + const char* file, int line) CI_HF; +#define ci_sys_fail(fn, rc) __ci_sys_fail(fn, rc, __FILE__, __LINE__) + +/********************************************************************** + * Logging to buffer (src/citools/log_buffer.c) + */ + +/*! Divert ci_log() messages to the log buffer + * normally they go to the system console */ +extern void ci_log_buffer_till_fail(void) CI_HF; + +/*! Dump the contents of the log buffer to the system console */ +extern void ci_log_buffer_dump(void) CI_HF; + + +/********************************************************************** + * Some useful pretty-printing. + */ + +#ifdef __linux__ +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s" + +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ + (((x) & MSG_OOB ) ? "OOB " :""), \ + (((x) & MSG_PEEK ) ? "PEEK " :""), \ + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \ + (((x) & MSG_EOR ) ? "EOR " :""), \ + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \ + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \ + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \ + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \ + (((x) & MSG_NOSIGNAL ) ? "NOSIGNAL " :""), \ + (((x) & MSG_ERRQUEUE ) ? "ERRQUEUE " :""), \ + (((x) & MSG_CONFIRM ) ? "CONFIRM " :"") +#endif + +#ifdef _WIN32 +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s" + +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ + (((x) & MSG_OOB ) ? "OOB " :""), \ + (((x) & MSG_PEEK ) ? "PEEK " :""), \ + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :"") +#endif + +#ifdef __sun__ +# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s" + +# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \ + (((x) & MSG_OOB ) ? "OOB " :""), \ + (((x) & MSG_PEEK ) ? "PEEK " :""), \ + (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \ + (((x) & MSG_EOR ) ? "EOR " :""), \ + (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \ + (((x) & MSG_TRUNC ) ? "TRUNC " :""), \ + (((x) & MSG_WAITALL ) ? "WAITALL " :""), \ + (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \ + (((x) & MSG_NOTIFICATION) ? "NOTIFICATION" :"") +#endif + +#endif /* __CI_TOOLS_LOG_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/config.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/config.h @@ -0,0 +1,49 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_tools */ + +#ifndef __CI_TOOLS_CONFIG_H__ +#define __CI_TOOLS_CONFIG_H__ + + +/********************************************************************** + * Debugging. + */ + +#define CI_INCLUDE_ASSERT_VALID 0 + +/* Set non-zero to allow info about who has allocated what to appear in + * /proc/drivers/level5/mem. + * However - Note that doing so can lead to segfault when you unload the + * driver, and other weirdness. i.e. I don't think the code for is quite + * right (written by Oktet, hacked by gel), but it does work well enough to be + * useful. + */ +#define CI_MEMLEAK_DEBUG_ALLOC_TABLE 0 + + +#endif /* __CI_TOOLS_CONFIG_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/sysdep.h @@ -0,0 +1,132 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_tools */ + +#ifndef __CI_TOOLS_SYSDEP_H__ +#define __CI_TOOLS_SYSDEP_H__ + +/* Make this header self-sufficient */ +#include +#include +#include + + +/********************************************************************** + * Platform dependencies. + */ + +#if defined(__KERNEL__) + +# if defined(__linux__) +# include +# elif defined(_WIN32) +# include +# elif defined(__sun__) +# include +# else +# error Unknown platform. +# endif + +#elif defined(_WIN32) + +# include + +#elif defined(__unix__) + +# include + +#else + +# error Unknown platform. + +#endif + +#if defined(__linux__) +/*! Linux sendfile() support enable/disable. */ +# define CI_HAVE_SENDFILE /* provide sendfile i/f */ + +# define CI_HAVE_OS_NOPAGE +#endif + +#if defined(__sun__) +# define CI_HAVE_SENDFILE /* provide sendfile i/f */ +# define CI_HAVE_SENDFILEV /* provide sendfilev i/f */ + +# define CI_IOCTL_SENDFILE /* use efrm CI_SENDFILEV ioctl */ +#endif + +#if defined(_WIN32) +typedef ci_uint32 ci_uerr_t; /* range of OS user-mode return codes */ +typedef ci_uint32 ci_kerr_t; /* range of OS kernel-mode return codes */ +#elif defined(__unix__) +typedef ci_int32 ci_uerr_t; /* range of OS user-mode return codes */ +typedef ci_int32 ci_kerr_t; /* range of OS kernel-mode return codes */ +#endif + + +/********************************************************************** + * Compiler and processor dependencies. + */ + +#if defined(__GNUC__) + +#if defined(__i386__) || defined(__x86_64__) +# include +#elif defined(__PPC__) +# include +#elif defined(__ia64__) +# include +#else +# error Unknown processor. +#endif + +#elif defined(_MSC_VER) + +#if defined(__i386__) +# include +# elif defined(__x86_64__) +# include +#else +# error Unknown processor. +#endif + +#elif defined(__PGI) + +# include + +#elif defined(__INTEL_COMPILER) + +/* Intel compilers v7 claim to be very gcc compatible. */ +# include + +#else +# error Unknown compiler. +#endif + + +#endif /* __CI_TOOLS_SYSDEP_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/debug.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/debug.h @@ -0,0 +1,336 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_tools */ + +#ifndef __CI_TOOLS_DEBUG_H__ +#define __CI_TOOLS_DEBUG_H__ + +#define CI_LOG_E(x) x /* errors */ +#define CI_LOG_W(x) x /* warnings */ +#define CI_LOG_I(x) x /* information */ +#define CI_LOG_V(x) x /* verbose */ + +/* Build time asserts. We paste the line number into the type name + * so that the macro can be used more than once per file even if the + * compiler objects to multiple identical typedefs. Collisions + * between use in different header files is still possible. */ +#ifndef CI_BUILD_ASSERT +#define __CI_BUILD_ASSERT_NAME(_x) __CI_BUILD_ASSERT_ILOATHECPP(_x) +#define __CI_BUILD_ASSERT_ILOATHECPP(_x) __CI_BUILD_ASSERT__ ##_x +#define CI_BUILD_ASSERT(e)\ + typedef char __CI_BUILD_ASSERT_NAME(__LINE__)[(e)?1:-1] +#endif + + +#ifdef NDEBUG + +# define _ci_check(exp, file, line) +# define _ci_assert2(e, x, y, file, line) +# define _ci_assert(exp, file, line) +# define _ci_assert_equal(exp1, exp2, file, line) +# define _ci_assert_equiv(exp1, exp2, file, line) +# define _ci_assert_nequal(exp1, exp2, file, line) +# define _ci_assert_le(exp1, exp2, file, line) +# define _ci_assert_lt(exp1, exp2, file, line) +# define _ci_assert_ge(exp1, exp2, file, line) +# define _ci_assert_gt(exp1, exp2, file, line) +# define _ci_assert_impl(exp1, exp2, file, line) + +# define _ci_verify(exp, file, line) \ + do { \ + (void)(exp); \ + } while (0) + +# define CI_DEBUG_TRY(exp) \ + do { \ + (void)(exp); \ + } while (0) + +#define CI_TRACE(exp,fmt) +#define CI_TRACE_INT(integer) +#define CI_TRACE_INT32(integer) +#define CI_TRACE_INT64(integer) +#define CI_TRACE_UINT(integer) +#define CI_TRACE_UINT32(integer) +#define CI_TRACE_UINT64(integer) +#define CI_TRACE_HEX(integer) +#define CI_TRACE_HEX32(integer) +#define CI_TRACE_HEX64(integer) +#define CI_TRACE_PTR(pointer) +#define CI_TRACE_STRING(string) +#define CI_TRACE_MAC(mac) +#define CI_TRACE_IP(ip_be32) +#define CI_TRACE_ARP(arp_pkt) + +#else + +# define _CI_ASSERT_FMT "\nfrom %s:%d" + +# define _ci_check(exp, file, line) \ + do { \ + if (CI_UNLIKELY(!(exp))) \ + ci_warn(("ci_check(%s)"_CI_ASSERT_FMT, #exp, \ + (file), (line))); \ + } while (0) + +/* + * NOTE: ci_fail() emits the file and line where the assert is actually + * coded. + */ + +# define _ci_assert(exp, file, line) \ + do { \ + if (CI_UNLIKELY(!(exp))) \ + ci_fail(("ci_assert(%s)"_CI_ASSERT_FMT, #exp, \ + (file), (line))); \ + } while (0) + +# define _ci_assert2(e, x, y, file, line) do { \ + if(CI_UNLIKELY( ! (e) )) \ + ci_fail(("ci_assert(%s)\nwhere [%s=%"CI_PRIx64"] " \ + "[%s=%"CI_PRIx64"]\nat %s:%d\nfrom %s:%d", #e \ + , #x, (ci_uint64)(ci_uintptr_t)(x) \ + , #y, (ci_uint64)(ci_uintptr_t)(y), \ + __FILE__, __LINE__, (file), (line))); \ + } while (0) + +# define _ci_verify(exp, file, line) \ + do { \ + if (CI_UNLIKELY(!(exp))) \ + ci_fail(("ci_verify(%s)"_CI_ASSERT_FMT, #exp, \ + (file), (line))); \ + } while (0) + +# define _ci_assert_equal(x, y, f, l) _ci_assert2((x)==(y), x, y, (f), (l)) +# define _ci_assert_nequal(x, y, f, l) _ci_assert2((x)!=(y), x, y, (f), (l)) +# define _ci_assert_le(x, y, f, l) _ci_assert2((x)<=(y), x, y, (f), (l)) +# define _ci_assert_lt(x, y, f, l) _ci_assert2((x)< (y), x, y, (f), (l)) +# define _ci_assert_ge(x, y, f, l) _ci_assert2((x)>=(y), x, y, (f), (l)) +# define _ci_assert_gt(x, y, f, l) _ci_assert2((x)> (y), x, y, (f), (l)) +# define _ci_assert_or(x, y, f, l) _ci_assert2((x)||(y), x, y, (f), (l)) +# define _ci_assert_impl(x, y, f, l) _ci_assert2(!(x) || (y), x, y, (f), (l)) +# define _ci_assert_equiv(x, y, f, l) _ci_assert2(!(x)== !(y), x, y, (f), (l)) + +#define _ci_assert_equal_msg(exp1, exp2, msg, file, line) \ + do { \ + if (CI_UNLIKELY((exp1)!=(exp2))) \ + ci_fail(("ci_assert_equal_msg(%s == %s) were " \ + "(%"CI_PRIx64":%"CI_PRIx64") with msg[%c%c%c%c]" \ + _CI_ASSERT_FMT, #exp1, #exp2, \ + (ci_uint64)(ci_uintptr_t)(exp1), \ + (ci_uint64)(ci_uintptr_t)(exp2), \ + (((ci_uint32)msg) >> 24) && 0xff, \ + (((ci_uint32)msg) >> 16) && 0xff, \ + (((ci_uint32)msg) >> 8 ) && 0xff, \ + (((ci_uint32)msg) ) && 0xff, \ + (file), (line))); \ + } while (0) + +# define CI_DEBUG_TRY(exp) CI_TRY(exp) + +#define CI_TRACE(exp,fmt) \ + ci_log("%s:%d:%s] " #exp "=" fmt, \ + __FILE__, __LINE__, __FUNCTION__, (exp)) + + +#define CI_TRACE_INT(integer) \ + ci_log("%s:%d:%s] " #integer "=%d", \ + __FILE__, __LINE__, __FUNCTION__, (integer)) + + +#define CI_TRACE_INT32(integer) \ + ci_log("%s:%d:%s] " #integer "=%d", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_int32)integer)) + + +#define CI_TRACE_INT64(integer) \ + ci_log("%s:%d:%s] " #integer "=%lld", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_int64)integer)) + + +#define CI_TRACE_UINT(integer) \ + ci_log("%s:%d:%s] " #integer "=%ud", \ + __FILE__, __LINE__, __FUNCTION__, (integer)) + + +#define CI_TRACE_UINT32(integer) \ + ci_log("%s:%d:%s] " #integer "=%ud", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer)) + + +#define CI_TRACE_UINT64(integer) \ + ci_log("%s:%d:%s] " #integer "=%ulld", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer)) + + +#define CI_TRACE_HEX(integer) \ + ci_log("%s:%d:%s] " #integer "=0x%x", \ + __FILE__, __LINE__, __FUNCTION__, (integer)) + + +#define CI_TRACE_HEX32(integer) \ + ci_log("%s:%d:%s] " #integer "=0x%x", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer)) + + +#define CI_TRACE_HEX64(integer) \ + ci_log("%s:%d:%s] " #integer "=0x%llx", \ + __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer)) + + +#define CI_TRACE_PTR(pointer) \ + ci_log("%s:%d:%s] " #pointer "=0x%p", \ + __FILE__, __LINE__, __FUNCTION__, (pointer)) + + +#define CI_TRACE_STRING(string) \ + ci_log("%s:%d:%s] " #string "=%s", \ + __FILE__, __LINE__, __FUNCTION__, (string)) + + +#define CI_TRACE_MAC(mac) \ + ci_log("%s:%d:%s] " #mac "=" CI_MAC_PRINTF_FORMAT, \ + __FILE__, __LINE__, __FUNCTION__, CI_MAC_PRINTF_ARGS(mac)) + + +#define CI_TRACE_IP(ip_be32) \ + ci_log("%s:%d:%s] " #ip_be32 "=" CI_IP_PRINTF_FORMAT, __FILE__, \ + __LINE__, __FUNCTION__, CI_IP_PRINTF_ARGS(&(ip_be32))) + + +#define CI_TRACE_ARP(arp_pkt) \ + ci_log("%s:%d:%s]\n"CI_ARP_PRINTF_FORMAT, \ + __FILE__, __LINE__, __FUNCTION__, CI_ARP_PRINTF_ARGS(arp_pkt)) + +#endif /* NDEBUG */ + +#define ci_check(exp) \ + _ci_check(exp, __FILE__, __LINE__) + +#define ci_assert(exp) \ + _ci_assert(exp, __FILE__, __LINE__) + +#define ci_verify(exp) \ + _ci_verify(exp, __FILE__, __LINE__) + +#define ci_assert_equal(exp1, exp2) \ + _ci_assert_equal(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_equal_msg(exp1, exp2, msg) \ + _ci_assert_equal_msg(exp1, exp2, msg, __FILE__, __LINE__) + +#define ci_assert_nequal(exp1, exp2) \ + _ci_assert_nequal(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_le(exp1, exp2) \ + _ci_assert_le(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_lt(exp1, exp2) \ + _ci_assert_lt(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_ge(exp1, exp2) \ + _ci_assert_ge(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_gt(exp1, exp2) \ + _ci_assert_gt(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_impl(exp1, exp2) \ + _ci_assert_impl(exp1, exp2, __FILE__, __LINE__) + +#define ci_assert_equiv(exp1, exp2) \ + _ci_assert_equiv(exp1, exp2, __FILE__, __LINE__) + + +#define CI_TEST(exp) \ + do{ \ + if( CI_UNLIKELY(!(exp)) ) \ + ci_fail(("CI_TEST(%s)", #exp)); \ + }while(0) + + +#define CI_TRY(exp) \ + do{ \ + int _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(_trc < 0) ) \ + ci_sys_fail(#exp, _trc); \ + }while(0) + + +#define CI_TRY_RET(exp) \ + do{ \ + int _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(_trc < 0) ) { \ + ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__); \ + return _trc; \ + } \ + }while(0) + +#define CI_LOGLEVEL_TRY_RET(logfn, exp) \ + do{ \ + int _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(_trc < 0) ) { \ + logfn (ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__)); \ + return _trc; \ + } \ + }while(0) + + +#define CI_SOCK_TRY(exp) \ + do{ \ + ci_sock_err_t _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) \ + ci_sys_fail(#exp, _trc.val); \ + }while(0) + + +#define CI_SOCK_TRY_RET(exp) \ + do{ \ + ci_sock_err_t _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \ + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \ + return ci_sock_errcode(_trc); \ + } \ + }while(0) + + +#define CI_SOCK_TRY_SOCK_RET(exp) \ + do{ \ + ci_sock_err_t _trc; \ + _trc=(exp); \ + if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \ + ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \ + return _trc; \ + } \ + }while(0) + +#endif /* __CI_TOOLS_DEBUG_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h @@ -0,0 +1,362 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + + +/*! \cidoxg_include_ci_tools_platform */ + +#ifndef __CI_TOOLS_LINUX_KERNEL_H__ +#define __CI_TOOLS_LINUX_KERNEL_H__ + +/********************************************************************** + * Need to know the kernel version. + */ + +#ifndef LINUX_VERSION_CODE +# include +# ifndef UTS_RELEASE + /* 2.6.18 onwards defines UTS_RELEASE in a separate header */ +# include +# endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(2,7,0) +# error "Linux 2.6 required" +#endif + + +#include /* kmalloc / kfree */ +#include /* vmalloc / vfree */ +#include /* in_interrupt() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define ci_in_irq in_irq +#define ci_in_interrupt in_interrupt +#define ci_in_atomic in_atomic + + +/********************************************************************** + * Misc stuff. + */ + +#ifdef BUG +# define CI_BOMB BUG +#endif + +ci_inline void* __ci_alloc(size_t n) +{ return kmalloc(n, (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)); } + +ci_inline void* __ci_atomic_alloc(size_t n) +{ return kmalloc(n, GFP_ATOMIC ); } + +ci_inline void __ci_free(void* p) { return kfree(p); } +ci_inline void* __ci_vmalloc(size_t n) { return vmalloc(n); } +ci_inline void __ci_vfree(void* p) { return vfree(p); } + + +#if CI_MEMLEAK_DEBUG_ALLOC_TABLE + #define ci_alloc(s) ci_alloc_memleak_debug (s, __FILE__, __LINE__) + #define ci_atomic_alloc(s) ci_atomic_alloc_memleak_debug(s, __FILE__, __LINE__) + #define ci_free ci_free_memleak_debug + #define ci_vmalloc(s) ci_vmalloc_memleak_debug (s, __FILE__,__LINE__) + #define ci_vfree ci_vfree_memleak_debug + #define ci_alloc_fn ci_alloc_fn_memleak_debug + #define ci_vmalloc_fn ci_vmalloc_fn_memleak_debug +#else /* !CI_MEMLEAK_DEBUG_ALLOC_TABLE */ + #define ci_alloc_fn __ci_alloc + #define ci_vmalloc_fn __ci_vmalloc +#endif + +#ifndef ci_alloc + #define ci_atomic_alloc __ci_atomic_alloc + #define ci_alloc __ci_alloc + #define ci_free __ci_free + #define ci_vmalloc __ci_vmalloc + #define ci_vmalloc_fn __ci_vmalloc + #define ci_vfree __ci_vfree +#endif + +#define ci_sprintf sprintf +#define ci_vsprintf vsprintf +#define ci_snprintf snprintf +#define ci_vsnprintf vsnprintf +#define ci_sscanf sscanf + + +#define CI_LOG_FN_DEFAULT ci_log_syslog + + +/*-------------------------------------------------------------------- + * + * irqs_disabled - needed for kmap helpers on some kernels + * + *--------------------------------------------------------------------*/ +#ifdef irqs_disabled +# define ci_irqs_disabled irqs_disabled +#else +# if defined(__i386__) | defined(__x86_64__) +# define ci_irqs_disabled(x) \ + ({ \ + unsigned long flags; \ + local_save_flags(flags); \ + !(flags & (1<<9)); \ + }) +# else +# error "Need to implement irqs_disabled() for your architecture" +# endif +#endif + + +/********************************************************************** + * kmap helpers. + * + * Use ci_k(un)map for code paths which are not in an atomic context. + * For atomic code you need to use ci_k(un)map_in_atomic. This will grab + * one of the per-CPU kmap slots. + * + * NB in_interrupt != in_irq. If you don't know the difference then + * don't use kmap_in_atomic + * + * 2.4 allocates kmap slots by function. We are going to re-use the + * skb module's slot - we also use the same interlock + * + * 2.6 allocates kmap slots by type as well as by function. We are + * going to use the currently (2.6.10) unsused SOFTIRQ slot + * + */ + +ci_inline void* ci_kmap(struct page *page) { + CI_DEBUG(if( ci_in_atomic() | ci_in_interrupt() | ci_in_irq() ) BUG()); + return kmap(page); +} + +ci_inline void ci_kunmap(struct page *page) { + kunmap(page); +} + +#define CI_KM_SLOT KM_SOFTIRQ0 + + +typedef struct semaphore ci_semaphore_t; + +ci_inline void +ci_sem_init (ci_semaphore_t *sem, int val) { + sema_init (sem, val); +} + +ci_inline void +ci_sem_down (ci_semaphore_t *sem) { + down (sem); +} + +ci_inline int +ci_sem_trydown (ci_semaphore_t *sem) { + return down_trylock (sem); +} + +ci_inline void +ci_sem_up (ci_semaphore_t *sem) { + up (sem); +} + +ci_inline int +ci_sem_get_count(ci_semaphore_t *sem) { + return sem->count.counter; +} + +ci_inline void* ci_kmap_in_atomic(struct page *page) +{ + CI_DEBUG(if( ci_in_irq() ) BUG()); + + /* iSCSI can call without in_interrupt() but with irqs_disabled() + and in a context that can't sleep, so we need to check that + too */ + if(ci_in_interrupt() || ci_irqs_disabled()) + return kmap_atomic(page, CI_KM_SLOT); + else + return kmap(page); +} + +ci_inline void ci_kunmap_in_atomic(struct page *page, void* kaddr) +{ + CI_DEBUG(if( ci_in_irq() ) BUG()); + + /* iSCSI can call without in_interrupt() but with irqs_disabled() + and in a context that can't sleep, so we need to check that + too */ + if(ci_in_interrupt() || ci_irqs_disabled()) + kunmap_atomic(kaddr, CI_KM_SLOT); + else + kunmap(page); +} + +/********************************************************************** + * spinlock implementation: used by + */ + +#define CI_HAVE_SPINLOCKS + +typedef ci_uintptr_t ci_lock_holder_t; +#define ci_lock_thisthread (ci_lock_holder_t)current +#define ci_lock_no_holder (ci_lock_holder_t)NULL + +typedef spinlock_t ci_lock_i; +typedef spinlock_t ci_irqlock_i; +typedef unsigned long ci_irqlock_state_t; + +#define IRQLOCK_CYCLES 500000 + +#define ci_lock_ctor_i(l) spin_lock_init(l) +#define ci_lock_dtor_i(l) do{}while(0) +#define ci_lock_lock_i(l) spin_lock(l) +#define ci_lock_trylock_i(l) spin_trylock(l) +#define ci_lock_unlock_i(l) spin_unlock(l) + +#define ci_irqlock_ctor_i(l) spin_lock_init(l) +#define ci_irqlock_dtor_i(l) do{}while(0) +#define ci_irqlock_lock_i(l,s) spin_lock_irqsave(l,*(s)) +#define ci_irqlock_unlock_i(l,s) spin_unlock_irqrestore(l, *(s)) + + +/********************************************************************** + * register access + */ + +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) +typedef volatile void __iomem* ioaddr_t; +#else +typedef unsigned long ioaddr_t; +#endif + + + +/********************************************************************** + * thread implementation -- kernel dependancies probably should be + * moved to driver/linux_kernel.h + */ + +#define ci_linux_daemonize(name) daemonize(name) + +#include + + +typedef struct { + void* (*fn)(void* arg); + void* arg; + const char* name; + int thrd_id; + struct completion exit_event; + struct work_struct keventd_witem; +} ci_kernel_thread_t; + + +typedef ci_kernel_thread_t* cithread_t; + + +extern int cithread_create(cithread_t* tid, void* (*fn)(void*), void* arg, + const char* name); +extern int cithread_detach(cithread_t kt); +extern int cithread_join(cithread_t kt); + + +/* Kernel sysctl variables. */ +extern int sysctl_tcp_wmem[3]; +extern int sysctl_tcp_rmem[3]; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) +#define LINUX_HAS_SYSCTL_MEM_MAX +extern ci_uint32 sysctl_wmem_max; +extern ci_uint32 sysctl_rmem_max; +#endif + + +/*-------------------------------------------------------------------- + * + * ci_bigbuf_t: An abstraction of a large buffer. Needed because in the + * Linux kernel, large buffers need to be allocated with vmalloc(), whereas + * smaller buffers should use kmalloc(). This abstraction chooses the + * appropriate mechansim. + * + *--------------------------------------------------------------------*/ + +typedef struct { + char* p; + int is_vmalloc; +} ci_bigbuf_t; + + +ci_inline int ci_bigbuf_alloc(ci_bigbuf_t* bb, size_t bytes) { + if( bytes >= CI_PAGE_SIZE && ! ci_in_atomic() ) { + bb->is_vmalloc = 1; + if( (bb->p = vmalloc(bytes)) ) return 0; + } + bb->is_vmalloc = 0; + bb->p = kmalloc(bytes, ci_in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); + return bb->p ? 0 : -ENOMEM; +} + +ci_inline void ci_bigbuf_free(ci_bigbuf_t* bb) { + if( bb->is_vmalloc ) vfree(bb->p); + else kfree(bb->p); +} + +ci_inline char* ci_bigbuf_ptr(ci_bigbuf_t* bb) +{ return bb->p; } + +/********************************************************************** + * struct iovec abstraction (for Windows port) + */ + +typedef struct iovec ci_iovec; + +/* Accessors for buffer/length */ +#define CI_IOVEC_BASE(i) ((i)->iov_base) +#define CI_IOVEC_LEN(i) ((i)->iov_len) + +/********************************************************************** + * Signals + */ + +ci_inline void +ci_send_sig(int signum) +{ + send_sig(signum, current, 0); +} + +#endif /* __CI_TOOLS_LINUX_KERNEL_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h @@ -0,0 +1,370 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_tools_platform */ + +#ifndef __CI_TOOLS_GCC_X86_H__ +#define __CI_TOOLS_GCC_X86_H__ + + +/********************************************************************** + * Free-running cycle counters. + */ + +#define CI_HAVE_FRC64 +#define CI_HAVE_FRC32 + +#define ci_frc32(pval) __asm__ __volatile__("rdtsc" : "=a" (*pval) : : "edx") + +#if defined(__x86_64__) +ci_inline void ci_frc64(ci_uint64* pval) { + /* temp fix until we figure how to get this out in one bite */ + ci_uint64 low, high; + __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high)); + *pval = (high << 32) | low; +} + +#else +#define ci_frc64(pval) __asm__ __volatile__("rdtsc" : "=A" (*pval)) +#endif + +#define ci_frc_flush() /* ?? Need a pipeline barrier. */ + + +/********************************************************************** + * Atomic integer. + */ + +/* +** int ci_atomic_read(a) { return a->n; } +** void ci_atomic_set(a, v) { a->n = v; } +** void ci_atomic_inc(a) { ++a->n; } +** void ci_atomic_dec(a) { --a->n; } +** int ci_atomic_inc_and_test(a) { return ++a->n == 0; } +** int ci_atomic_dec_and_test(a) { return --a->n == 0; } +** void ci_atomic_and(a, v) { a->n &= v; } +** void ci_atomic_or(a, v) { a->n |= v; } +*/ + +typedef struct { volatile ci_int32 n; } ci_atomic_t; + +#define CI_ATOMIC_INITIALISER(i) {(i)} + +static inline ci_int32 ci_atomic_read(const ci_atomic_t* a) { return a->n; } +static inline void ci_atomic_set(ci_atomic_t* a, int v) { a->n = v; ci_wmb(); } + +static inline void ci_atomic_inc(ci_atomic_t* a) +{ __asm__ __volatile__("lock; incl %0" : "+m" (a->n)); } + + +static inline void ci_atomic_dec(ci_atomic_t* a) +{ __asm__ __volatile__("lock; decl %0" : "+m" (a->n)); } + +static inline int ci_atomic_inc_and_test(ci_atomic_t* a) { + char r; + __asm__ __volatile__("lock; incl %0; sete %1" + : "+m" (a->n), "=qm" (r)); + return r; +} + +static inline int ci_atomic_dec_and_test(ci_atomic_t* a) { + char r; + __asm__ __volatile__("lock; decl %0; sete %1" + : "+m" (a->n), "=qm" (r)); + return r; +} + +ci_inline int +ci_atomic_xadd (ci_atomic_t *a, int v) { + __asm__ ("lock xadd %0, %1" : "=r" (v), "+m" (a->n) : "0" (v)); + return v; +} +ci_inline int +ci_atomic_xchg (ci_atomic_t *a, int v) { + __asm__ ("lock xchg %0, %1" : "=r" (v), "+m" (a->n) : "0" (v)); + return v; +} + +ci_inline void ci_atomic32_or(volatile ci_uint32* p, ci_uint32 mask) +{ __asm__ __volatile__("lock; orl %1, %0" : "+m" (*p) : "ir" (mask)); } + +ci_inline void ci_atomic32_and(volatile ci_uint32* p, ci_uint32 mask) +{ __asm__ __volatile__("lock; andl %1, %0" : "+m" (*p) : "ir" (mask)); } + +ci_inline void ci_atomic32_add(volatile ci_uint32* p, ci_uint32 v) +{ __asm__ __volatile__("lock; addl %1, %0" : "+m" (*p) : "ir" (v)); } + +ci_inline void ci_atomic32_inc(volatile ci_uint32* p) +{ __asm__ __volatile__("lock; incl %0" : "+m" (*p)); } + +ci_inline int ci_atomic32_dec_and_test(volatile ci_uint32* p) { + char r; + __asm__ __volatile__("lock; decl %0; sete %1" : "+m" (*p), "=qm" (r)); + return r; +} + +#define ci_atomic_or(a, v) ci_atomic32_or ((ci_uint32*) &(a)->n, (v)) +#define ci_atomic_and(a, v) ci_atomic32_and((ci_uint32*) &(a)->n, (v)) +#define ci_atomic_add(a, v) ci_atomic32_add((ci_uint32*) &(a)->n, (v)) + +extern int ci_glibc_uses_nptl (void) CI_HF; +extern int ci_glibc_nptl_broken(void) CI_HF; +extern int ci_glibc_gs_get_is_multihreaded_offset (void) CI_HF; +extern int ci_glibc_gs_is_multihreaded_offset CI_HV; + +#if !defined(__x86_64__) +#ifdef __GLIBC__ +/* Returns non-zero if the calling process might be mulithreaded, returns 0 if + * it definitely isn't (i.e. if reimplementing this function for other + * architectures and platforms, you can safely just return 1). + */ +static inline int ci_is_multithreaded (void) { + + while (1) { + if (ci_glibc_gs_is_multihreaded_offset >= 0) { + /* NPTL keeps a variable that tells us this hanging off gs (i.e. in thread- + * local storage); just return this + */ + int r; + __asm__ __volatile__ ("movl %%gs:(%1), %0" + : "=r" (r) + : "r" (ci_glibc_gs_is_multihreaded_offset)); + return r; + } + + if (ci_glibc_gs_is_multihreaded_offset == -2) { + /* This means we've already determined that the libc version is NOT good + * for our funky "is multithreaded" hack + */ + return 1; + } + + /* If we get here, it means this is the first time the function has been + * called -- detect the libc version and go around again. + */ + ci_glibc_gs_is_multihreaded_offset = ci_glibc_gs_get_is_multihreaded_offset (); + + /* Go around again. We do the test here rather than at the top so that we go + * quicker in the common the case + */ + } +} + +#else /* def __GLIBC__ */ + +#define ci_is_multithreaded() 1 /* ?? Is the the POSIX way of finding out */ + /* whether the appication is single */ + /* threaded? */ + +#endif /* def __GLIBC__ */ + +#else /* defined __x86_64__ */ + +static inline int ci_is_multithreaded (void) { + /* Now easy way to tell on x86_64; so assume we're multithreaded */ + return 1; +} + +#endif /* defined __x86_64__ */ + + +/********************************************************************** + * Compare and swap. + */ + +#define CI_HAVE_COMPARE_AND_SWAP + +ci_inline int ci_cas32_succeed(volatile ci_int32* p, ci_int32 oldval, + ci_int32 newval) { + char ret; + ci_int32 prevval; + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +ci_inline int ci_cas32_fail(volatile ci_int32* p, ci_int32 oldval, + ci_int32 newval) { + char ret; + ci_int32 prevval; + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +#ifdef __x86_64__ +ci_inline int ci_cas64_succeed(volatile ci_int64* p, ci_int64 oldval, + ci_int64 newval) { + char ret; + ci_int64 prevval; + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +ci_inline int ci_cas64_fail(volatile ci_int64* p, ci_int64 oldval, + ci_int64 newval) { + char ret; + ci_int64 prevval; + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} +#endif + +ci_inline int ci_cas32u_succeed(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) { + char ret; + ci_uint32 prevval; + __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +ci_inline int ci_cas32u_fail(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) { + char ret; + ci_uint32 prevval; + __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +ci_inline int ci_cas64u_succeed(volatile ci_uint64* p, ci_uint64 oldval, + ci_uint64 newval) { + char ret; + ci_uint64 prevval; + __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +ci_inline int ci_cas64u_fail(volatile ci_uint64* p, ci_uint64 oldval, + ci_uint64 newval) { + char ret; + ci_uint64 prevval; + __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0" + : "=q"(ret), "+m"(*p), "=a"(prevval) + : "r"(newval), "a"(oldval)); + return ret; +} + +#ifdef __x86_64__ + +# define ci_cas_uintptr_succeed(p,o,n) \ + ci_cas64u_succeed((volatile ci_uint64*) (p), (o), (n)) +# define ci_cas_uintptr_fail(p,o,n) \ + ci_cas64u_fail((volatile ci_uint64*) (p), (o), (n)) + +#else + +# define ci_cas_uintptr_succeed(p,o,n) \ + ci_cas32u_succeed((volatile ci_uint32*) (p), (o), (n)) +# define ci_cas_uintptr_fail(p,o,n) \ + ci_cas32u_fail((volatile ci_uint32*) (p), (o), (n)) + +#endif + + +/********************************************************************** + * Atomic bit field. + */ + +typedef ci_uint32 ci_bits; +#define CI_BITS_N 32u + +#define CI_BITS_DECLARE(name, n) \ + ci_bits name[((n) + CI_BITS_N - 1u) / CI_BITS_N] + +ci_inline void ci_bits_clear_all(volatile ci_bits* b, int n_bits) +{ memset((void*) b, 0, (n_bits+CI_BITS_N-1u) / CI_BITS_N * sizeof(ci_bits)); } + +ci_inline void ci_bit_set(volatile ci_bits* b, int i) { + __asm__ __volatile__("lock; btsl %1, %0" + : "=m" (*b) + : "Ir" (i)); +} + +ci_inline void ci_bit_clear(volatile ci_bits* b, int i) { + __asm__ __volatile__("lock; btrl %1, %0" + : "=m" (*b) + : "Ir" (i)); +} + +ci_inline int ci_bit_test(volatile ci_bits* b, int i) { + char rc; + __asm__("btl %2, %1; setc %0" + : "=r" (rc) + : "m" (*b), "Ir" (i)); + return rc; +} + +ci_inline int ci_bit_test_and_set(volatile ci_bits* b, int i) { + char rc; + __asm__ __volatile__("lock; btsl %2, %1; setc %0" + : "=r" (rc), "+m" (*b) + : "Ir" (i)); + return rc; +} + +ci_inline int ci_bit_test_and_clear(volatile ci_bits* b, int i) { + char rc; + __asm__ __volatile__("lock; btrl %2, %1; setc %0" + : "=r" (rc), "+m" (*b) + : "Ir" (i)); + return rc; +} + +/* These mask ops only work within a single ci_bits word. */ +#define ci_bit_mask_set(b,m) ci_atomic32_or((b), (m)) +#define ci_bit_mask_clear(b,m) ci_atomic32_and((b), ~(m)) + + +/********************************************************************** + * Misc. + */ + +#if __GNUC__ >= 3 +# define ci_spinloop_pause() __asm__("pause") +#else +# define ci_spinloop_pause() __asm__(".byte 0xf3, 0x90") +#endif + + +#define CI_HAVE_ADDC32 +#define ci_add_carry32(sum, v) __asm__("addl %1, %0 ;" \ + "adcl $0, %0 ;" \ + : "=r" (sum) \ + : "g" ((ci_uint32) v), "0" (sum)) + + +#endif /* __CI_TOOLS_GCC_X86_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/x86.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/x86.h @@ -0,0 +1,48 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_X86_H__ +#define __CI_COMPAT_X86_H__ + + +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN + +#define CI_WORD_SIZE 4 +#define CI_PTR_SIZE 4 + +#define CI_PAGE_SIZE 4096 +#define CI_PAGE_SHIFT 12 +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1)) + +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */ +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */ +#define CI_CPU_OOS 0 /* CPU does out of order stores */ + + +#endif /* __CI_COMPAT_X86_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/utils.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/utils.h @@ -0,0 +1,269 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Handy utility macros. + * \date 2003/01/17 + */ + +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_UTILS_H__ +#define __CI_COMPAT_UTILS_H__ + + +/********************************************************************** + * Alignment -- [align] must be a power of 2. + **********************************************************************/ + + /*! Align forward onto next boundary. */ + +#define CI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u)) + + + /*! Align back onto prev boundary. */ + +#define CI_ALIGN_BACK(p, align) ((p) & ~((align)-1u)) + + + /*! How far to next boundary? */ + +#define CI_ALIGN_NEEDED(p, align, signed_t) (-(signed_t)(p) & ((align)-1u)) + + + /*! How far beyond prev boundary? */ + +#define CI_OFFSET(p, align) ((p) & ((align)-1u)) + + + /*! Does object fit in gap before next boundary? */ + +#define CI_FITS(p, size, align, signed_t) \ + (CI_ALIGN_NEEDED((p) + 1, (align), signed_t) + 1 >= (size)) + + + /*! Align forward onto next boundary. */ + +#define CI_PTR_ALIGN_FWD(p, align) \ + ((char*) CI_ALIGN_FWD(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))) + + /*! Align back onto prev boundary. */ + +#define CI_PTR_ALIGN_BACK(p, align) \ + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))) + + /*! How far to next boundary? */ + +#define CI_PTR_ALIGN_NEEDED(p, align) \ + CI_ALIGN_NEEDED(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)), \ + ci_ptr_arith_t) + + /*! How far to next boundary? NZ = not zero i.e. give align if on boundary */ + +#define CI_PTR_ALIGN_NEEDED_NZ(p, align) \ + ((align) - (((char*)p) - \ + ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))))) + + /*! How far beyond prev boundary? */ + +#define CI_PTR_OFFSET(p, align) \ + CI_OFFSET(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))) + + + /* Same as CI_ALIGN_FWD and CI_ALIGN_BACK. */ + +#define CI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u)) + +#define CI_ROUND_DOWN(i, align) ((i) & ~((align)-1u)) + + +/********************************************************************** + * Byte-order + **********************************************************************/ + +/* These are not flags. They are enumeration values for use with + * CI_MY_BYTE_ORDER. */ +#define CI_BIG_ENDIAN 1 +#define CI_LITTLE_ENDIAN 0 + +/* +** Note that these byte-swapping primitives may leave junk in bits above +** the range they operate on. +** +** The CI_BSWAP_nn() routines require that bits above [nn] are zero. Use +** CI_BSWAPM_nn(x) if this cannot be guaranteed. +*/ + +/* ?? May be able to improve on some of these with inline assembler on some +** platforms. +*/ + +#define CI_BSWAP_16(v) ((((v) & 0xff) << 8) | ((v) >> 8)) +#define CI_BSWAPM_16(v) ((((v) & 0xff) << 8) | (((v) & 0xff00) >> 8)) + +#define CI_BSWAP_32(v) (((v) >> 24) | \ + (((v) & 0x00ff0000) >> 8) | \ + (((v) & 0x0000ff00) << 8) | \ + ((v) << 24)) +#define CI_BSWAPM_32(v) ((((v) & 0xff000000) >> 24) | \ + (((v) & 0x00ff0000) >> 8) | \ + (((v) & 0x0000ff00) << 8) | \ + ((v) << 24)) + +#define CI_BSWAP_64(v) (((v) >> 56) | \ + (((v) & 0x00ff000000000000) >> 40) | \ + (((v) & 0x0000ff0000000000) >> 24) | \ + (((v) & 0x000000ff00000000) >> 8) | \ + (((v) & 0x00000000ff000000) << 8) | \ + (((v) & 0x0000000000ff0000) << 24) | \ + (((v) & 0x000000000000ff00) << 40) | \ + ((v) << 56)) + +# define CI_BSWAPPED_16_IF(c,v) ((c) ? CI_BSWAP_16(v) : (v)) +# define CI_BSWAPPED_32_IF(c,v) ((c) ? CI_BSWAP_32(v) : (v)) +# define CI_BSWAPPED_64_IF(c,v) ((c) ? CI_BSWAP_64(v) : (v)) +# define CI_BSWAP_16_IF(c,v) do{ if((c)) (v) = CI_BSWAP_16(v); }while(0) +# define CI_BSWAP_32_IF(c,v) do{ if((c)) (v) = CI_BSWAP_32(v); }while(0) +# define CI_BSWAP_64_IF(c,v) do{ if((c)) (v) = CI_BSWAP_64(v); }while(0) + +#if (CI_MY_BYTE_ORDER == CI_LITTLE_ENDIAN) +# define CI_BSWAP_LE16(v) (v) +# define CI_BSWAP_LE32(v) (v) +# define CI_BSWAP_LE64(v) (v) +# define CI_BSWAP_BE16(v) CI_BSWAP_16(v) +# define CI_BSWAP_BE32(v) CI_BSWAP_32(v) +# define CI_BSWAP_BE64(v) CI_BSWAP_64(v) +# define CI_BSWAPM_LE16(v) (v) +# define CI_BSWAPM_LE32(v) (v) +# define CI_BSWAPM_LE64(v) (v) +# define CI_BSWAPM_BE16(v) CI_BSWAPM_16(v) +# define CI_BSWAPM_BE32(v) CI_BSWAPM_32(v) +#elif (CI_MY_BYTE_ORDER == CI_BIG_ENDIAN) +# define CI_BSWAP_BE16(v) (v) +# define CI_BSWAP_BE32(v) (v) +# define CI_BSWAP_BE64(v) (v) +# define CI_BSWAP_LE16(v) CI_BSWAP_16(v) +# define CI_BSWAP_LE32(v) CI_BSWAP_32(v) +# define CI_BSWAP_LE64(v) CI_BSWAP_64(v) +# define CI_BSWAPM_BE16(v) (v) +# define CI_BSWAPM_BE32(v) (v) +# define CI_BSWAPM_BE64(v) (v) +# define CI_BSWAPM_LE16(v) CI_BSWAPM_16(v) +# define CI_BSWAPM_LE32(v) CI_BSWAPM_32(v) +#else +# error Bad endian. +#endif + + +/********************************************************************** + * Get pointer to struct from pointer to member + **********************************************************************/ + +#define CI_MEMBER_OFFSET(c_type, mbr_name) \ + ((ci_uint32) (ci_uintptr_t)(&((c_type*)0)->mbr_name)) + +#define CI_MEMBER_SIZE(c_type, mbr_name) \ + sizeof(((c_type*)0)->mbr_name) + +#define __CI_CONTAINER(c_type, mbr_name, p_mbr) \ + ( (c_type*) ((char*)(p_mbr) - CI_MEMBER_OFFSET(c_type, mbr_name)) ) + +#ifndef CI_CONTAINER +# define CI_CONTAINER(t,m,p) __CI_CONTAINER(t,m,p) +#endif + + +/********************************************************************** + * Structure member initialiser. + **********************************************************************/ + +#ifndef CI_STRUCT_MBR +# define CI_STRUCT_MBR(name, val) .name = val +#endif + + +/********************************************************************** + * min / max + **********************************************************************/ + +#define CI_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define CI_MAX(x,y) (((x) > (y)) ? (x) : (y)) + +/********************************************************************** + * abs + **********************************************************************/ + +#define CI_ABS(x) (((x) < 0) ? -(x) : (x)) + +/********************************************************************** + * Conditional debugging + **********************************************************************/ + +#ifdef NDEBUG +# define CI_DEBUG(x) +# define CI_NDEBUG(x) x +# define CI_IF_DEBUG(y,n) (n) +# define CI_DEBUG_ARG(x) +#else +# define CI_DEBUG(x) x +# define CI_NDEBUG(x) +# define CI_IF_DEBUG(y,n) (y) +# define CI_DEBUG_ARG(x) ,x +#endif + +#ifdef __KERNEL__ +#define CI_KERNEL_ARG(x) ,x +#else +#define CI_KERNEL_ARG(x) +#endif + +#ifdef _WIN32 +# define CI_KERNEL_ARG_WIN(x) CI_KERNEL_ARG(x) +# define CI_ARG_WIN(x) ,x +#else +# define CI_KERNEL_ARG_WIN(x) +# define CI_ARG_WIN(x) +#endif + +#ifdef __unix__ +# define CI_KERNEL_ARG_UNIX(x) CI_KERNEL_ARG(x) +# define CI_ARG_UNIX(x) ,x +#else +# define CI_KERNEL_ARG_UNIX(x) +# define CI_ARG_UNIX(x) +#endif + +#ifdef __linux__ +# define CI_KERNEL_ARG_LINUX(x) CI_KERNEL_ARG(x) +# define CI_ARG_LINUX(x) ,x +#else +# define CI_KERNEL_ARG_LINUX(x) +# define CI_ARG_LINUX(x) +#endif + + +#endif /* __CI_COMPAT_UTILS_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/primitive.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/primitive.h @@ -0,0 +1,77 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_PRIMITIVE_H__ +#define __CI_COMPAT_PRIMITIVE_H__ + + +/********************************************************************** + * Primitive types. + */ + +typedef unsigned char ci_uint8; +typedef char ci_int8; + +typedef unsigned short ci_uint16; +typedef short ci_int16; + +typedef unsigned int ci_uint32; +typedef int ci_int32; + +/* 64-bit support is platform dependent. */ + + +/********************************************************************** + * Other fancy types. + */ + +typedef ci_uint8 ci_octet; + +typedef enum { + CI_FALSE = 0, + CI_TRUE +} ci_boolean_t; + + +/********************************************************************** + * Some nice types you'd always assumed were standards. + * (Really, they are SYSV "standards".) + */ + +#ifdef _WIN32 +typedef unsigned long ulong; +typedef unsigned int uint; +typedef char* caddr_t; +#elif defined(__linux__) && defined(__KERNEL__) +#include +#elif defined(__linux__) +#include +#endif + + +#endif /* __CI_COMPAT_PRIMITIVE_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/x86_64.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/x86_64.h @@ -0,0 +1,54 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Arch stuff for AMD x86_64. + * \date 2004/08/17 + */ + +/*! \cidoxg_include_ci_compat */ +#ifndef __CI_COMPAT_X86_64_H__ +#define __CI_COMPAT_X86_64_H__ + + +#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN + +#define CI_WORD_SIZE 8 +#define CI_PTR_SIZE 8 + +#define CI_PAGE_SIZE 4096 +#define CI_PAGE_SHIFT 12 +#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1)) + +#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */ + +/* SSE2 disabled while investigating BUG1060 */ +#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */ +#define CI_CPU_OOS 0 /* CPU does out of order stores */ + + +#endif /* __CI_COMPAT_X86_64_H__ */ +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/gcc_x86.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/gcc_x86.h @@ -0,0 +1,115 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_GCC_X86_H__ +#define __CI_COMPAT_GCC_X86_H__ + +/* +** The facts: +** +** SSE sfence +** SSE2 lfence, mfence, pause +*/ + +/* + Barriers to enforce ordering with respect to: + + normal memory use: ci_wmb, ci_rmb, ci_wmb + IO bus access use: ci_wiob, ci_riob, ci_iob +*/ +#if defined(__x86_64__) +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory") +#else +# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory") +#endif + +/* ?? measure the impact of latency of sfence on a modern processor before we + take a decision on how to integrate with respect to writecombining */ + +/* DJR: I don't think we need to add "memory" here. It means the asm does +** something to memory that GCC doesn't understand. But all this does is +** commit changes that GCC thinks have already happened. NB. GCC will not +** reorder across a __volatile__ __asm__ anyway. +*/ +#define ci_gcc_fence() __asm__ __volatile__ ("") + +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# define ci_x86_sfence() __asm__ __volatile__ ("sfence") +# define ci_x86_lfence() __asm__ __volatile__ ("lfence") +# define ci_x86_mfence() __asm__ __volatile__ ("mfence") +#else +# define ci_x86_sfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") +# define ci_x86_lfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xE8") +# define ci_x86_mfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF0") +#endif + + +/* x86 processors to P4 Xeon store in-order unless executing streaming + extensions or when using writecombining + + Hence we do not define ci_wmb to use sfence by default. Requirement is that + we do not use writecombining to memory and any code which uses SSE + extensions must call sfence directly + + We need to track non intel clones which may support out of order store. + +*/ + +#if CI_CPU_OOS +# if CI_CPU_HAS_SSE +# define ci_wmb() ci_x86_sfence() +# else +# define ci_wmb() ci_x86_mb() +# endif +#else +# define ci_wmb() ci_gcc_fence() +#endif + +#if CI_CPU_HAS_SSE2 +# define ci_rmb() ci_x86_lfence() +# define ci_mb() ci_x86_mfence() +# define ci_riob() ci_x86_lfence() +# define ci_wiob() ci_x86_sfence() +# define ci_iob() ci_x86_mfence() +#else +# if CI_CPU_HAS_SSE +# define ci_wiob() ci_x86_sfence() +# else +# define ci_wiob() ci_x86_mb() +# endif +# define ci_rmb() ci_x86_mb() +# define ci_mb() ci_x86_mb() +# define ci_riob() ci_x86_mb() +# define ci_iob() ci_x86_mb() +#endif + +typedef unsigned long ci_phys_addr_t; +#define ci_phys_addr_fmt "%lx" + +#endif /* __CI_COMPAT_GCC_X86_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/gcc.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/gcc.h @@ -0,0 +1,158 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_GCC_H__ +#define __CI_COMPAT_GCC_H__ + + +#define CI_HAVE_INT64 + + +#if defined(__linux__) && defined(__KERNEL__) + +# include + +typedef __u64 ci_uint64; +typedef __s64 ci_int64; +# if BITS_PER_LONG == 32 +typedef __s32 ci_ptr_arith_t; +typedef __u32 ci_uintptr_t; +# else +typedef __s64 ci_ptr_arith_t; +typedef __u64 ci_uintptr_t; +# endif + + +/* it's not obvious to me why the below is wrong for x64_64, but + * gcc seems to complain on this platform + */ +# if defined(__ia64__) +# define CI_PRId64 "ld" +# define CI_PRIi64 "li" +# define CI_PRIo64 "lo" +# define CI_PRIu64 "lu" +# define CI_PRIx64 "lx" +# define CI_PRIX64 "lX" +# else +# define CI_PRId64 "lld" +# define CI_PRIi64 "lli" +# define CI_PRIo64 "llo" +# define CI_PRIu64 "llu" +# define CI_PRIx64 "llx" +# define CI_PRIX64 "llX" +# endif + +# define CI_PRId32 "d" +# define CI_PRIi32 "i" +# define CI_PRIo32 "o" +# define CI_PRIu32 "u" +# define CI_PRIx32 "x" +# define CI_PRIX32 "X" + +#else + +# include +# include + +typedef uint64_t ci_uint64; +typedef int64_t ci_int64; +typedef intptr_t ci_ptr_arith_t; +typedef uintptr_t ci_uintptr_t; + +# define CI_PRId64 PRId64 +# define CI_PRIi64 PRIi64 +# define CI_PRIo64 PRIo64 +# define CI_PRIu64 PRIu64 +# define CI_PRIx64 PRIx64 +# define CI_PRIX64 PRIX64 + +# define CI_PRId32 PRId32 +# define CI_PRIi32 PRIi32 +# define CI_PRIo32 PRIo32 +# define CI_PRIu32 PRIu32 +# define CI_PRIx32 PRIx32 +# define CI_PRIX32 PRIX32 + +#endif + + +typedef ci_uint64 ci_fixed_descriptor_t; + +#define from_fixed_descriptor(desc) ((ci_uintptr_t)(desc)) +#define to_fixed_descriptor(desc) ((ci_fixed_descriptor_t)(ci_uintptr_t)(desc)) + + +#if __GNUC__ >= 3 && !defined(__cplusplus) +/* +** Checks that [p_mbr] has the same type as [&c_type::mbr_name]. +*/ +# define CI_CONTAINER(c_type, mbr_name, p_mbr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof__(&((c_type*)0)->mbr_name), \ + __typeof__(p_mbr)), \ + __CI_CONTAINER(c_type, mbr_name, p_mbr), (void)0) + +# define ci_restrict __restrict__ +#endif + + +#if !defined(__KERNEL__) || defined(__unix__) +#define CI_HAVE_NPRINTF 1 +#endif + + +/* At what version was this introduced? */ +#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91) +# define CI_LIKELY(t) __builtin_expect((t), 1) +# define CI_UNLIKELY(t) __builtin_expect((t), 0) +#endif + +/********************************************************************** + * Attributes + */ +#if __GNUC__ >= 3 && defined(NDEBUG) +# define CI_HF __attribute__((visibility("hidden"))) +# define CI_HV __attribute__((visibility("hidden"))) +#else +# define CI_HF +# define CI_HV +#endif + +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) +# define ci_noinline static __attribute__((__noinline__)) +/* (Linux 2.6 defines its own "noinline", so we use the "__noinline__" form) */ +#else +# define ci_noinline static +#endif + +#define CI_ALIGN(x) __attribute__ ((aligned (x))) + +#define CI_PRINTF_LIKE(a,b) __attribute__((format(printf,a,b))) + +#endif /* __CI_COMPAT_GCC_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/compat/sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/compat/sysdep.h @@ -0,0 +1,166 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/*! \cidoxg_include_ci_compat */ + +#ifndef __CI_COMPAT_SYSDEP_H__ +#define __CI_COMPAT_SYSDEP_H__ + + +/********************************************************************** + * Platform definition fixups. + */ + +#if defined(__ci_ul_driver__) && !defined(__ci_driver__) +# define __ci_driver__ +#endif + +#if defined(__ci_driver__) && !defined(__ci_ul_driver__) && \ + !defined(__KERNEL__) +# define __KERNEL__ +#endif + + +/********************************************************************** + * Sanity checks (no cheating!) + */ + +#if defined(__KERNEL__) && !defined(__ci_driver__) +# error Insane. +#endif + +#if defined(__KERNEL__) && defined(__ci_ul_driver__) +# error Madness. +#endif + +#if defined(__unix__) && defined(_WIN32) +# error Strange. +#endif + +#if defined(__GNUC__) && defined(_MSC_VER) +# error Crazy. +#endif + + +/********************************************************************** + * Compiler and processor dependencies. + */ + +#if defined(__GNUC__) + +# include + +# if defined(__i386__) +# include +# include +# elif defined(__x86_64__) +# include +# include +# elif defined(__PPC__) +# include +# include +# elif defined(__ia64__) +# include +# include +# else +# error Unknown processor - GNU C +# endif + +#elif defined(_MSC_VER) + +# include + +# if defined(__i386__) +# include +# include +# elif defined(__x86_64__) +# include +# include +# else +# error Unknown processor MSC +# endif + +#elif defined(__PGI) + +# include +# include + +#elif defined(__INTEL_COMPILER) + +/* Intel compilers v7 claim to be very gcc compatible. */ +# if __INTEL_COMPILER >= 700 +# include +# include +# include +# else +# error Old Intel compiler not supported. Yet. +# endif + +#else +# error Unknown compiler. +#endif + + +/********************************************************************** + * Misc stuff (that probably shouldn't be here). + */ + +#ifdef __sun +# ifdef __KERNEL__ +# define _KERNEL +# define _SYSCALL32 +# ifdef _LP64 +# define _SYSCALL32_IMPL +# endif +# else +# define _REENTRANT +# endif +#endif + + +/********************************************************************** + * Defaults for anything left undefined. + */ + +#ifndef CI_LIKELY +# define CI_LIKELY(t) (t) +# define CI_UNLIKELY(t) (t) +#endif + +#ifndef ci_restrict +# define ci_restrict +#endif + +#ifndef ci_inline +# define ci_inline static inline +#endif + +#ifndef ci_noinline +# define ci_noinline static +#endif + +#endif /* __CI_COMPAT_SYSDEP_H__ */ + +/*! \cidoxg_end */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/public.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/public.h @@ -0,0 +1,104 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API of efhw library exported from the SFC + * resource driver. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_PUBLIC_H__ +#define __CI_EFHW_PUBLIC_H__ + +#include +#include + +/*! Returns true if we have some EtherFabric functional units - + whether configured or not */ +static inline int efhw_nic_have_functional_units(struct efhw_nic *nic) +{ + return nic->efhw_func != 0; +} + +/*! Returns true if the EtherFabric functional units have been configured */ +static inline int efhw_nic_have_hw(struct efhw_nic *nic) +{ + return efhw_nic_have_functional_units(nic) && (EFHW_KVA(nic) != 0); +} + +/*! Helper function to allocate the iobuffer needed by an eventq + * - it ensures the eventq has the correct alignment for the NIC + * + * \param rm Event-queue resource manager + * \param instance Event-queue instance (index) + * \param buf_bytes Requested size of eventq + * \return < 0 if iobuffer allocation fails + */ +int efhw_nic_event_queue_alloc_iobuffer(struct efhw_nic *nic, + struct eventq_resource_hardware *h, + int evq_instance, unsigned buf_bytes); + +extern void falcon_nic_set_rx_usr_buf_size(struct efhw_nic *, + int rx_usr_buf_size); + +/*! Get RX filter search limits from RX_FILTER_CTL_REG. + * use_raw_values = 0 to get actual depth of search, or 1 to get raw values + * from register. + */ +extern void +falcon_nic_get_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values); + +/*! Set RX filter search limits in RX_FILTER_CTL_REG. + * use_raw_values = 0 if specifying actual depth of search, or 1 if specifying + * raw values to write to the register. + */ +extern void +falcon_nic_set_rx_filter_search_limits(struct efhw_nic *nic, + struct efhw_filter_search_limits *lim, + int use_raw_values); + + +/*! Legacy RX IP filter search depth control interface */ +extern void +falcon_nic_rx_filter_ctl_set(struct efhw_nic *nic, uint32_t tcp_full, + uint32_t tcp_wild, + uint32_t udp_full, uint32_t udp_wild); + +/*! Legacy RX IP filter search depth control interface */ +extern void +falcon_nic_rx_filter_ctl_get(struct efhw_nic *nic, uint32_t *tcp_full, + uint32_t *tcp_wild, + uint32_t *udp_full, uint32_t *udp_wild); + +#endif /* __CI_EFHW_PUBLIC_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/efhw_types.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/efhw_types.h @@ -0,0 +1,382 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides struct efhw_nic and some related types. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EFAB_TYPES_H__ +#define __CI_EFHW_EFAB_TYPES_H__ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * forward type declarations + * + *--------------------------------------------------------------------*/ + +struct efhw_nic; + +/*-------------------------------------------------------------------- + * + * Managed interface + * + *--------------------------------------------------------------------*/ + +struct efhw_buffer_table_allocation{ + unsigned base; + unsigned order; +}; + +struct eventq_resource_hardware { + /*!iobuffer allocated for eventq - can be larger than eventq */ + struct efhw_iopages iobuff; + unsigned iobuff_off; + struct efhw_buffer_table_allocation buf_tbl_alloc; + int capacity; /*!< capacity of event queue */ +}; + +/*-------------------------------------------------------------------- + * + * event queues and event driven callbacks + * + *--------------------------------------------------------------------*/ + +struct efhw_keventq { + int lock; + caddr_t evq_base; + int32_t evq_ptr; + uint32_t evq_mask; + unsigned instance; + struct eventq_resource_hardware hw; + struct efhw_ev_handler *ev_handlers; +}; + +/*-------------------------------------------------------------------- + * + * filters + * + *--------------------------------------------------------------------*/ + +struct efhw_filter_spec { + uint dmaq_id; + uint32_t saddr_le32; + uint32_t daddr_le32; + uint16_t sport_le16; + uint16_t dport_le16; + unsigned tcp : 1; + unsigned full : 1; + unsigned rss : 1; /* not supported on A1 */ + unsigned scatter : 1; /* not supported on A1 */ +}; + +struct efhw_filter_depth { + unsigned needed; + unsigned max; +}; + +struct efhw_filter_search_limits { + unsigned tcp_full; + unsigned tcp_wild; + unsigned udp_full; + unsigned udp_wild; +}; + + +/********************************************************************** + * Portable HW interface. *************************************** + **********************************************************************/ + +/*-------------------------------------------------------------------- + * + * EtherFabric Functional units - configuration and control + * + *--------------------------------------------------------------------*/ + +struct efhw_func_ops { + + /*-------------- Initialisation ------------ */ + + /*! close down all hardware functional units - leaves NIC in a safe + state for driver unload */ + void (*close_hardware) (struct efhw_nic *nic); + + /*! initialise all hardware functional units */ + int (*init_hardware) (struct efhw_nic *nic, + struct efhw_ev_handler *, + const uint8_t *mac_addr, int non_irq_evq); + + /*-------------- Interrupt support ------------ */ + + /*! Main interrupt routine + ** This function returns, + ** - zero, if the IRQ was not generated by EF1 + ** - non-zero, if EF1 was the source of the IRQ + ** + ** + ** opaque is an OS provided pointer for use by the OS callbacks + ** e.g in Windows used to indicate DPC scheduled + */ + int (*interrupt) (struct efhw_nic *nic); + + /*! Enable the interrupt */ + void (*interrupt_enable) (struct efhw_nic *nic); + + /*! Disable the interrupt */ + void (*interrupt_disable) (struct efhw_nic *nic); + + /*! Set interrupt moderation strategy for the given IRQ unit + ** val is in usec + */ + void (*set_interrupt_moderation)(struct efhw_nic *nic, int evq, + uint val); + + /*-------------- Event support ------------ */ + + /*! Enable the given event queue + depending on the underlying implementation (EF1 or Falcon) then + either a q_base_addr in host memory, or a buffer base id should + be proivded + */ + void (*event_queue_enable) (struct efhw_nic *nic, + uint evq, /* evnt queue index */ + uint evq_size, /* units of #entries */ + dma_addr_t q_base_addr, uint buf_base_id, + int interrupting); + + /*! Disable the given event queue (and any associated timer) */ + void (*event_queue_disable) (struct efhw_nic *nic, uint evq, + int timer_only); + + /*! request wakeup from the NIC on a given event Q */ + void (*wakeup_request) (struct efhw_nic *nic, dma_addr_t q_base_addr, + int next_i, int evq); + + /*! Push a SW event on a given eventQ */ + void (*sw_event) (struct efhw_nic *nic, int data, int evq); + + /*-------------- IP Filter API ------------ */ + + /*! Setup a given filter - The software can request a filter_i, + * but some EtherFabric implementations will override with + * a more suitable index + */ + int (*ipfilter_set) (struct efhw_nic *nic, int type, + int *filter_i, int dmaq, + unsigned saddr_be32, unsigned sport_be16, + unsigned daddr_be32, unsigned dport_be16); + + /*! Clear down a given filter */ + void (*ipfilter_clear) (struct efhw_nic *nic, int filter_idx); + + /*-------------- DMA support ------------ */ + + /*! Initialise NIC state for a given TX DMAQ */ + void (*dmaq_tx_q_init) (struct efhw_nic *nic, + uint dmaq, uint evq, uint owner, uint tag, + uint dmaq_size, uint buf_idx, uint flags); + + /*! Initialise NIC state for a given RX DMAQ */ + void (*dmaq_rx_q_init) (struct efhw_nic *nic, + uint dmaq, uint evq, uint owner, uint tag, + uint dmaq_size, uint buf_idx, uint flags); + + /*! Disable a given TX DMAQ */ + void (*dmaq_tx_q_disable) (struct efhw_nic *nic, uint dmaq); + + /*! Disable a given RX DMAQ */ + void (*dmaq_rx_q_disable) (struct efhw_nic *nic, uint dmaq); + + /*! Flush a given TX DMA channel */ + int (*flush_tx_dma_channel) (struct efhw_nic *nic, uint dmaq); + + /*! Flush a given RX DMA channel */ + int (*flush_rx_dma_channel) (struct efhw_nic *nic, uint dmaq); + + /*-------------- Buffer table Support ------------ */ + + /*! Initialise a buffer table page */ + void (*buffer_table_set) (struct efhw_nic *nic, + dma_addr_t dma_addr, + uint bufsz, uint region, + int own_id, int buffer_id); + + /*! Initialise a block of buffer table pages */ + void (*buffer_table_set_n) (struct efhw_nic *nic, int buffer_id, + dma_addr_t dma_addr, + uint bufsz, uint region, + int n_pages, int own_id); + + /*! Clear a block of buffer table pages */ + void (*buffer_table_clear) (struct efhw_nic *nic, int buffer_id, + int num); + + /*! Commit a buffer table update */ + void (*buffer_table_commit) (struct efhw_nic *nic); + + /*-------------- New filter API ------------ */ + + /*! Set a given filter */ + int (*filter_set) (struct efhw_nic *nic, struct efhw_filter_spec *spec, + int *filter_idx_out); + + /*! Clear a given filter */ + void (*filter_clear) (struct efhw_nic *nic, int filter_idx); +}; + + +/*---------------------------------------------------------------------------- + * + * NIC type + * + *---------------------------------------------------------------------------*/ + +struct efhw_device_type { + int arch; /* enum efhw_arch */ + char variant; /* 'A', 'B', ... */ + int revision; /* 0, 1, ... */ +}; + + +/*---------------------------------------------------------------------------- + * + * EtherFabric NIC instance - nic.c for HW independent functions + * + *---------------------------------------------------------------------------*/ + +/*! */ +struct efhw_nic { + /*! zero base index in efrm_nic_tablep->nic array */ + int index; + int ifindex; /*!< OS level nic index */ + struct net *nd_net; + + struct efhw_device_type devtype; + + /*! Options that can be set by user. */ + unsigned options; +# define NIC_OPT_EFTEST 0x1 /* owner is an eftest app */ + +# define NIC_OPT_DEFAULT 0 + + /*! Internal flags that indicate hardware properties at runtime. */ + unsigned flags; +# define NIC_FLAG_NO_INTERRUPT 0x01 /* to be set at init time only */ +# define NIC_FLAG_TRY_MSI 0x02 +# define NIC_FLAG_MSI 0x04 +# define NIC_FLAG_OS_IRQ_EN 0x08 + + unsigned mtu; /*!< MAC MTU (includes MAC hdr) */ + + /* hardware resources */ + + /*! I/O address of the start of the bar */ + volatile char __iomem *bar_ioaddr; + + /*! Bar number of control aperture. */ + unsigned ctr_ap_bar; + /*! Length of control aperture in bytes. */ + unsigned ctr_ap_bytes; + + uint8_t mac_addr[ETH_ALEN]; /*!< mac address */ + + /*! EtherFabric Functional Units -- functions */ + const struct efhw_func_ops *efhw_func; + + /*! This lock protects a number of misc NIC resources. It should + * only be used for things that can be at the bottom of the lock + * order. ie. You mustn't attempt to grab any other lock while + * holding this one. + */ + spinlock_t *reg_lock; + spinlock_t the_reg_lock; + + int buf_commit_outstanding; /*!< outstanding buffer commits */ + + /*! interrupt callbacks (hard-irq) */ + void (*irq_handler) (struct efhw_nic *, int unit); + + /*! event queues per driver */ + struct efhw_keventq interrupting_evq; + +/* for marking when we are not using an IRQ unit + - 0 is a valid offset to an IRQ unit on EF1! */ +#define EFHW_IRQ_UNIT_UNUSED 0xffff + /*! interrupt unit in use for the interrupting event queue */ + unsigned int irq_unit; + + struct efhw_keventq non_interrupting_evq; + + struct efhw_iopage irq_iobuff; /*!< Falcon SYSERR interrupt */ + + /* The new driverlink infrastructure. */ + struct efx_dl_device *net_driver_dev; + struct efx_dlfilt_cb_s *dlfilter_cb; + + /*! Bit masks of the sizes of event queues and dma queues supported + * by the nic. */ + unsigned evq_sizes; + unsigned rxq_sizes; + unsigned txq_sizes; + + /* Size of filter table. */ + unsigned ip_filter_tbl_size; + + /* Number of filters currently used */ + unsigned ip_filter_tbl_used; + + /* Dynamically allocated filter state. */ + uint8_t *filter_in_use; + struct efhw_filter_spec *filter_spec_cache; + + /* Currently required and maximum filter table search depths. */ + struct efhw_filter_depth tcp_full_srch; + struct efhw_filter_depth tcp_wild_srch; + struct efhw_filter_depth udp_full_srch; + struct efhw_filter_depth udp_wild_srch; + + /* Number of event queues, DMA queues and timers. */ + unsigned num_evqs; + unsigned num_dmaqs; + unsigned num_timers; +}; + + +#define EFHW_KVA(nic) ((nic)->bar_ioaddr) + + +#endif /* __CI_EFHW_EFHW_TYPES_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/hardware_sysdep.h @@ -0,0 +1,69 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for header files + * with hardware-related definitions (in ci/driver/efab/hardware*). + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_HARDWARE_LINUX_H__ +#define __CI_EFHW_HARDWARE_LINUX_H__ + +#include + +#if defined(__LITTLE_ENDIAN) +#define EFHW_IS_LITTLE_ENDIAN +#elif defined(__BIG_ENDIAN) +#define EFHW_IS_BIG_ENDIAN +#else +#error Unknown endianness +#endif + +#ifndef readq +static inline uint64_t __readq(volatile void __iomem *addr) +{ + return *(volatile uint64_t *)addr; +} +#define readq(x) __readq(x) +#endif + +#ifndef writeq +static inline void __writeq(uint64_t v, volatile void __iomem *addr) +{ + *(volatile uint64_t *)addr = v; +} +#define writeq(val, addr) __writeq((val), (addr)) +#endif + +#endif /* __CI_EFHW_HARDWARE_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/efhw_config.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/efhw_config.h @@ -0,0 +1,43 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides some limits used in both kernel and userland code. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_EFAB_CONFIG_H__ +#define __CI_EFHW_EFAB_CONFIG_H__ + +#define EFHW_MAX_NR_DEVS 5 /* max number of efhw devices supported */ + +#endif /* __CI_EFHW_EFAB_CONFIG_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/iopage_types.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/iopage_types.h @@ -0,0 +1,190 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides struct efhw_page and struct efhw_iopage for Linux + * kernel. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_IOPAGE_LINUX_H__ +#define __CI_EFHW_IOPAGE_LINUX_H__ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------- + * + * struct efhw_page: A single page of memory. Directly mapped in the + * driver, and can be mapped to userlevel. + * + *--------------------------------------------------------------------*/ + +struct efhw_page { + unsigned long kva; +}; + +static inline int efhw_page_alloc(struct efhw_page *p) +{ + p->kva = __get_free_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); + return p->kva ? 0 : -ENOMEM; +} + +static inline int efhw_page_alloc_zeroed(struct efhw_page *p) +{ + p->kva = get_zeroed_page(in_interrupt()? GFP_ATOMIC : GFP_KERNEL); + return p->kva ? 0 : -ENOMEM; +} + +static inline void efhw_page_free(struct efhw_page *p) +{ + free_page(p->kva); + EFHW_DO_DEBUG(memset(p, 0, sizeof(*p))); +} + +static inline char *efhw_page_ptr(struct efhw_page *p) +{ + return (char *)p->kva; +} + +static inline unsigned efhw_page_pfn(struct efhw_page *p) +{ + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); +} + +static inline void efhw_page_mark_invalid(struct efhw_page *p) +{ + p->kva = 0; +} + +static inline int efhw_page_is_valid(struct efhw_page *p) +{ + return p->kva != 0; +} + +static inline void efhw_page_init_from_va(struct efhw_page *p, void *va) +{ + p->kva = (unsigned long)va; +} + +/*-------------------------------------------------------------------- + * + * struct efhw_iopage: A single page of memory. Directly mapped in the driver, + * and can be mapped to userlevel. Can also be accessed by the NIC. + * + *--------------------------------------------------------------------*/ + +struct efhw_iopage { + struct efhw_page p; + dma_addr_t dma_addr; +}; + +static inline dma_addr_t efhw_iopage_dma_addr(struct efhw_iopage *p) +{ + return p->dma_addr; +} + +#define efhw_iopage_ptr(iop) efhw_page_ptr(&(iop)->p) +#define efhw_iopage_pfn(iop) efhw_page_pfn(&(iop)->p) +#define efhw_iopage_mark_invalid(iop) efhw_page_mark_invalid(&(iop)->p) +#define efhw_iopage_is_valid(iop) efhw_page_is_valid(&(iop)->p) + +/*-------------------------------------------------------------------- + * + * struct efhw_iopages: A set of pages that are contiguous in physical + * memory. Directly mapped in the driver, and can be mapped to userlevel. + * Can also be accessed by the NIC. + * + * NB. The O/S may be unwilling to allocate many, or even any of these. So + * only use this type where the NIC really needs a physically contiguous + * buffer. + * + *--------------------------------------------------------------------*/ + +struct efhw_iopages { + caddr_t kva; + unsigned order; + dma_addr_t dma_addr; +}; + +static inline caddr_t efhw_iopages_ptr(struct efhw_iopages *p) +{ + return p->kva; +} + +static inline unsigned efhw_iopages_pfn(struct efhw_iopages *p) +{ + return (unsigned)(__pa(p->kva) >> PAGE_SHIFT); +} + +static inline dma_addr_t efhw_iopages_dma_addr(struct efhw_iopages *p) +{ + return p->dma_addr; +} + +static inline unsigned efhw_iopages_size(struct efhw_iopages *p) +{ + return 1u << (p->order + PAGE_SHIFT); +} + +/* struct efhw_iopage <-> struct efhw_iopages conversions for handling + * physically contiguous allocations in iobufsets for iSCSI. This allows + * the essential information about contiguous allocations from + * efhw_iopages_alloc() to be saved away in the struct efhw_iopage array in + * an iobufset. (Changing the iobufset resource to use a union type would + * involve a lot of code changes, and make the iobufset's metadata larger + * which could be bad as it's supposed to fit into a single page on some + * platforms.) + */ +static inline void +efhw_iopage_init_from_iopages(struct efhw_iopage *iopage, + struct efhw_iopages *iopages, unsigned pageno) +{ + iopage->p.kva = ((unsigned long)efhw_iopages_ptr(iopages)) + + (pageno * PAGE_SIZE); + iopage->dma_addr = efhw_iopages_dma_addr(iopages) + + (pageno * PAGE_SIZE); +} + +static inline void +efhw_iopages_init_from_iopage(struct efhw_iopages *iopages, + struct efhw_iopage *iopage, unsigned order) +{ + iopages->kva = (caddr_t) efhw_iopage_ptr(iopage); + EFHW_ASSERT(iopages->kva); + iopages->order = order; + iopages->dma_addr = efhw_iopage_dma_addr(iopage); +} + +#endif /* __CI_EFHW_IOPAGE_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/common_sysdep.h @@ -0,0 +1,61 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for + * userland-to-kernel interfaces. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_COMMON_LINUX_H__ +#define __CI_EFHW_COMMON_LINUX_H__ + +#include + +/* Dirty hack, but Linux kernel does not provide DMA_ADDR_T_FMT */ +#if BITS_PER_LONG == 64 || defined(CONFIG_HIGHMEM64G) +#define DMA_ADDR_T_FMT "%llx" +#else +#define DMA_ADDR_T_FMT "%x" +#endif + +/* Linux kernel also does not provide PRIx32... Sigh. */ +#define PRIx32 "x" + +#ifdef __ia64__ +# define PRIx64 "lx" +#else +# define PRIx64 "llx" +#endif + +#endif /* __CI_EFHW_COMMON_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/sysdep.h @@ -0,0 +1,55 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for efhw library. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_SYSDEP_LINUX_H__ +#define __CI_EFHW_SYSDEP_LINUX_H__ + +#include +#include +#include +#include +#include + +#include /* necessary for etherdevice.h on some kernels */ +#include + +typedef unsigned long irq_flags_t; + +#define spin_lock_destroy(l_) do {} while (0) + +#endif /* __CI_EFHW_SYSDEP_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/debug.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/debug.h @@ -0,0 +1,84 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides debug-related API for efhw library using Linux kernel + * primitives. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_DEBUG_LINUX_H__ +#define __CI_EFHW_DEBUG_LINUX_H__ + +#define EFHW_PRINTK_PREFIX "[sfc efhw] " + +#define EFHW_PRINTK(level, fmt, ...) \ + printk(level EFHW_PRINTK_PREFIX fmt "\n", __VA_ARGS__) + +/* Following macros should be used with non-zero format parameters + * due to __VA_ARGS__ limitations. Use "%s" with __FUNCTION__ if you can't + * find better parameters. */ +#define EFHW_ERR(fmt, ...) EFHW_PRINTK(KERN_ERR, fmt, __VA_ARGS__) +#define EFHW_WARN(fmt, ...) EFHW_PRINTK(KERN_WARNING, fmt, __VA_ARGS__) +#define EFHW_NOTICE(fmt, ...) EFHW_PRINTK(KERN_NOTICE, fmt, __VA_ARGS__) +#if 0 && !defined(NDEBUG) +#define EFHW_TRACE(fmt, ...) EFHW_PRINTK(KERN_DEBUG, fmt, __VA_ARGS__) +#else +#define EFHW_TRACE(fmt, ...) +#endif + +#ifndef NDEBUG +#define EFHW_ASSERT(cond) BUG_ON((cond) == 0) +#define EFHW_DO_DEBUG(expr) expr +#else +#define EFHW_ASSERT(cond) +#define EFHW_DO_DEBUG(expr) +#endif + +#define EFHW_TEST(expr) \ + do { \ + if (unlikely(!(expr))) \ + BUG(); \ + } while (0) + +/* Build time asserts. We paste the line number into the type name + * so that the macro can be used more than once per file even if the + * compiler objects to multiple identical typedefs. Collisions + * between use in different header files is still possible. */ +#ifndef EFHW_BUILD_ASSERT +#define __EFHW_BUILD_ASSERT_NAME(_x) __EFHW_BUILD_ASSERT_ILOATHECPP(_x) +#define __EFHW_BUILD_ASSERT_ILOATHECPP(_x) __EFHW_BUILD_ASSERT__ ##_x +#define EFHW_BUILD_ASSERT(e) \ + typedef char __EFHW_BUILD_ASSERT_NAME(__LINE__)[(e) ? 1 : -1] +#endif + +#endif /* __CI_EFHW_DEBUG_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efhw/common.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efhw/common.h @@ -0,0 +1,97 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides API of the efhw library which may be used both from + * the kernel and from the user-space code. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFHW_COMMON_H__ +#define __CI_EFHW_COMMON_H__ + +#include + +enum efhw_arch { + EFHW_ARCH_FALCON, +}; + +typedef uint32_t efhw_buffer_addr_t; +#define EFHW_BUFFER_ADDR_FMT "[ba:%"PRIx32"]" + +/*! Comment? */ +typedef union { + uint64_t u64; + struct { + uint32_t a; + uint32_t b; + } opaque; +} efhw_event_t; + +/* Flags for TX/RX queues */ +#define EFHW_VI_JUMBO_EN 0x01 /*! scatter RX over multiple desc */ +#define EFHW_VI_ISCSI_RX_HDIG_EN 0x02 /*! iscsi rx header digest */ +#define EFHW_VI_ISCSI_TX_HDIG_EN 0x04 /*! iscsi tx header digest */ +#define EFHW_VI_ISCSI_RX_DDIG_EN 0x08 /*! iscsi rx data digest */ +#define EFHW_VI_ISCSI_TX_DDIG_EN 0x10 /*! iscsi tx data digest */ +#define EFHW_VI_TX_PHYS_ADDR_EN 0x20 /*! TX physical address mode */ +#define EFHW_VI_RX_PHYS_ADDR_EN 0x40 /*! RX physical address mode */ +#define EFHW_VI_RM_WITH_INTERRUPT 0x80 /*! VI with an interrupt */ +#define EFHW_VI_TX_IP_CSUM_DIS 0x100 /*! enable ip checksum generation */ +#define EFHW_VI_TX_TCPUDP_CSUM_DIS 0x200 /*! enable tcp/udp checksum + generation */ +#define EFHW_VI_TX_TCPUDP_ONLY 0x400 /*! drop non-tcp/udp packets */ + +/* Types of hardware filter */ +/* Each of these values implicitly selects scatter filters on B0 - or in + EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK if a non-scatter filter is required */ +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD (0) /* dest host only */ +#define EFHW_IP_FILTER_TYPE_UDP_FULL (1) /* dest host and port */ +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD (2) /* dest based filter */ +#define EFHW_IP_FILTER_TYPE_TCP_FULL (3) /* src filter */ +/* Same again, but with RSS (for B0 only) */ +#define EFHW_IP_FILTER_TYPE_UDP_WILDCARD_RSS_B0 (4) +#define EFHW_IP_FILTER_TYPE_UDP_FULL_RSS_B0 (5) +#define EFHW_IP_FILTER_TYPE_TCP_WILDCARD_RSS_B0 (6) +#define EFHW_IP_FILTER_TYPE_TCP_FULL_RSS_B0 (7) + +#define EFHW_IP_FILTER_TYPE_FULL_MASK (0x1) /* Mask for full / wildcard */ +#define EFHW_IP_FILTER_TYPE_TCP_MASK (0x2) /* Mask for TCP type */ +#define EFHW_IP_FILTER_TYPE_RSS_B0_MASK (0x4) /* Mask for B0 RSS enable */ +#define EFHW_IP_FILTER_TYPE_NOSCAT_B0_MASK (0x8) /* Mask for B0 SCATTER dsbl */ + +#define EFHW_IP_FILTER_TYPE_MASK (0xffff) /* Mask of types above */ + +#define EFHW_IP_FILTER_BROADCAST (0x10000) /* driverlink filter + support */ + +#endif /* __CI_EFHW_COMMON_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efrm/sysdep_linux.h @@ -0,0 +1,93 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides version-independent Linux kernel API for efrm library. + * Only kernels >=2.6.9 are supported. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Kfifo API is partially stolen from linux-2.6.22/include/linux/list.h + * Copyright (C) 2004 Stelian Pop + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_SYSDEP_LINUX_H__ +#define __CI_EFRM_SYSDEP_LINUX_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/******************************************************************** + * + * List API + * + ********************************************************************/ + +static inline struct list_head *list_pop(struct list_head *list) +{ + struct list_head *link = list->next; + list_del(link); + return link; +} + +static inline struct list_head *list_pop_tail(struct list_head *list) +{ + struct list_head *link = list->prev; + list_del(link); + return link; +} + +/******************************************************************** + * + * Kfifo API + * + ********************************************************************/ + +static inline void kfifo_vfree(struct kfifo *fifo) +{ + vfree(fifo->buffer); + kfree(fifo); +} + +#endif /* __CI_EFRM_SYSDEP_LINUX_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efrm/sysdep.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efrm/sysdep.h @@ -0,0 +1,46 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides Linux-like system-independent API for efrm library. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_SYSDEP_H__ +#define __CI_EFRM_SYSDEP_H__ + +/* Spinlocks are defined in efhw/sysdep.h */ +#include + +#include + +#endif /* __CI_EFRM_SYSDEP_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/efrm/nic_table.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/efrm/nic_table.h @@ -0,0 +1,98 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file provides public API for NIC table. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * Certain parts of the driver were implemented by + * Alexandra Kossovsky + * OKTET Labs Ltd, Russia, + * http://oktetlabs.ru, + * by request of Solarflare Communications + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_EFRM_NIC_TABLE_H__ +#define __CI_EFRM_NIC_TABLE_H__ + +#include +#include + +/*-------------------------------------------------------------------- + * + * struct efrm_nic_table - top level driver object keeping all NICs - + * implemented in driver_object.c + * + *--------------------------------------------------------------------*/ + +/*! Comment? */ +struct efrm_nic_table { + /*! nics attached to this driver */ + struct efhw_nic *nic[EFHW_MAX_NR_DEVS]; + /*! pointer to an arbitrary struct efhw_nic if one exists; + * for code which does not care which NIC it wants but + * still needs one. Note you cannot assume nic[0] exists. */ + struct efhw_nic *a_nic; + uint32_t nic_count; /*!< number of nics attached to this driver */ + spinlock_t lock; /*!< lock for table modifications */ + atomic_t ref_count; /*!< refcount for users of nic table */ +}; + +/* Resource driver structures used by other drivers as well */ +extern struct efrm_nic_table *efrm_nic_tablep; + +static inline void efrm_nic_table_hold(void) +{ + atomic_inc(&efrm_nic_tablep->ref_count); +} + +static inline void efrm_nic_table_rele(void) +{ + atomic_dec(&efrm_nic_tablep->ref_count); +} + +static inline int efrm_nic_table_held(void) +{ + return atomic_read(&efrm_nic_tablep->ref_count) != 0; +} + +/* Run code block _x multiple times with variable nic set to each + * registered NIC in turn. + * DO NOT "break" out of this loop early. */ +#define EFRM_FOR_EACH_NIC(_nic_i, _nic) \ + for ((_nic_i) = (efrm_nic_table_hold(), 0); \ + (_nic_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ + (_nic_i)++) \ + if (((_nic) = efrm_nic_tablep->nic[_nic_i])) + +#define EFRM_FOR_EACH_NIC_IN_SET(_set, _i, _nic) \ + for ((_i) = (efrm_nic_table_hold(), 0); \ + (_i) < EFHW_MAX_NR_DEVS || (efrm_nic_table_rele(), 0); \ + ++(_i)) \ + if (((_nic) = efrm_nic_tablep->nic[_i]) && \ + efrm_nic_set_read((_set), (_i))) + +#endif /* __CI_EFRM_NIC_TABLE_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h +++ linux-ec2-2.6.31/drivers/xen/sfc_netback/ci/driver/resource/efx_vi.h @@ -0,0 +1,273 @@ +/**************************************************************************** + * Driver for Solarflare network controllers - + * resource management for Xen backend, OpenOnload, etc + * (including support for SFE4001 10GBT NIC) + * + * This file contains public EFX VI API to Solarflare resource manager. + * + * Copyright 2005-2007: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Developed and maintained by Solarflare Communications: + * + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef __CI_DRIVER_RESOURCE_EFX_VI_H__ +#define __CI_DRIVER_RESOURCE_EFX_VI_H__ + +/* Default size of event queue in the efx_vi resource. Copied from + * CI_CFG_NETIF_EVENTQ_SIZE */ +#define EFX_VI_EVENTQ_SIZE_DEFAULT 1024 + +extern int efx_vi_eventq_size; + +/************************************************************************** + * efx_vi_state types, allocation and free + **************************************************************************/ + +/*! Handle for refering to a efx_vi */ +struct efx_vi_state; + +/*! + * Allocate an efx_vi, including event queue and pt_endpoint + * + * \param vih_out Pointer to a handle that is set on success + * \param ifindex Index of the network interface desired + * \return Zero on success (and vih_out set), non-zero on failure. + */ +extern int +efx_vi_alloc(struct efx_vi_state **vih_out, int ifindex); + +/*! + * Free a previously allocated efx_vi + * + * \param vih The handle of the efx_vi to free + */ +extern void +efx_vi_free(struct efx_vi_state *vih); + +/*! + * Reset a previously allocated efx_vi + * + * \param vih The handle of the efx_vi to reset + */ +extern void +efx_vi_reset(struct efx_vi_state *vih); + +/************************************************************************** + * efx_vi_eventq types and functions + **************************************************************************/ + +/*! + * Register a function to receive callbacks when event queue timeouts + * or wakeups occur. Only one function per efx_vi can be registered + * at once. + * + * \param vih The handle to identify the efx_vi + * \param callback The function to callback + * \param context An argument to pass to the callback function + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_eventq_register_callback(struct efx_vi_state *vih, + void (*callback)(void *context, int is_timeout), + void *context); + +/*! + * Remove the current eventq timeout or wakeup callback function + * + * \param vih The handle to identify the efx_vi + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_eventq_kill_callback(struct efx_vi_state *vih); + +/************************************************************************** + * efx_vi_dma_map types and functions + **************************************************************************/ + +/*! + * Handle for refering to a efx_vi + */ +struct efx_vi_dma_map_state; + +/*! + * Map a list of buffer pages so they are registered with the hardware + * + * \param vih The handle to identify the efx_vi + * \param addrs An array of page pointers to map + * \param n_addrs Length of the page pointer array. Must be a power of two. + * \param dmh_out Set on success to a handle used to refer to this mapping + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_dma_map_pages(struct efx_vi_state *vih, struct page **pages, + int n_pages, struct efx_vi_dma_map_state **dmh_out); +extern int +efx_vi_dma_map_addrs(struct efx_vi_state *vih, + unsigned long long *dev_bus_addrs, int n_pages, + struct efx_vi_dma_map_state **dmh_out); + +/*! + * Unmap a previously mapped set of pages so they are no longer registered + * with the hardware. + * + * \param vih The handle to identify the efx_vi + * \param dmh The handle to identify the dma mapping + */ +extern void +efx_vi_dma_unmap_pages(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); +extern void +efx_vi_dma_unmap_addrs(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); + +/*! + * Retrieve the buffer address of the mapping + * + * \param vih The handle to identify the efx_vi + * \param dmh The handle to identify the buffer mapping + * \return The buffer address on success, or zero on failure + */ +extern unsigned +efx_vi_dma_get_map_addr(struct efx_vi_state *vih, + struct efx_vi_dma_map_state *dmh); + +/************************************************************************** + * efx_vi filter functions + **************************************************************************/ + +#define EFX_VI_STATIC_FILTERS 32 + +/*! Handle to refer to a filter instance */ +struct filter_resource_t; + +/*! + * Allocate and add a filter + * + * \param vih The handle to identify the efx_vi + * \param protocol The protocol of the new filter: UDP or TCP + * \param ip_addr_be32 The local ip address of the filter + * \param port_le16 The local port of the filter + * \param fh_out Set on success to be a handle to refer to this filter + * \return Zero on success, non-zero on failure. + */ +extern int +efx_vi_filter(struct efx_vi_state *vih, int protocol, unsigned ip_addr_be32, + int port_le16, struct filter_resource_t **fh_out); + +/*! + * Remove a filter and free resources associated with it + * + * \param vih The handle to identify the efx_vi + * \param fh The handle to identify the filter + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_filter_stop(struct efx_vi_state *vih, struct filter_resource_t *fh); + +/************************************************************************** + * efx_vi hw resources types and functions + **************************************************************************/ + +/*! Constants for the type field in efx_vi_hw_resource */ +#define EFX_VI_HW_RESOURCE_TXDMAQ 0x0 /* PFN of TX DMA Q */ +#define EFX_VI_HW_RESOURCE_RXDMAQ 0x1 /* PFN of RX DMA Q */ +#define EFX_VI_HW_RESOURCE_EVQTIMER 0x4 /* Address of event q timer */ + +/* Address of event q pointer (EF1) */ +#define EFX_VI_HW_RESOURCE_EVQPTR 0x5 +/* Address of register pointer (Falcon A) */ +#define EFX_VI_HW_RESOURCE_EVQRPTR 0x6 +/* Offset of register pointer (Falcon B) */ +#define EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET 0x7 +/* Address of mem KVA */ +#define EFX_VI_HW_RESOURCE_EVQMEMKVA 0x8 +/* PFN of doorbell page (Falcon) */ +#define EFX_VI_HW_RESOURCE_BELLPAGE 0x9 + +/*! How large an array to allocate for the get_() functions - smaller + than the total number of constants as some are mutually exclusive */ +#define EFX_VI_HW_RESOURCE_MAXSIZE 0x7 + +/*! Constants for the mem_type field in efx_vi_hw_resource */ +#define EFX_VI_HW_RESOURCE_IOBUFFER 0 /* Host memory */ +#define EFX_VI_HW_RESOURCE_PERIPHERAL 1 /* Card memory/registers */ + +/*! + * Data structure providing information on a hardware resource mapping + */ +struct efx_vi_hw_resource { + u8 type; /*!< What this resource represents */ + u8 mem_type; /*!< What type of memory is it in, eg, + * host or iomem */ + u8 more_to_follow; /*!< Is this part of a multi-region resource */ + u32 length; /*!< Length of the resource in bytes */ + unsigned long address; /*!< Address of this resource */ +}; + +/*! + * Metadata concerning the list of hardware resource mappings + */ +struct efx_vi_hw_resource_metadata { + int evq_order; + int evq_offs; + int evq_capacity; + int instance; + unsigned rx_capacity; + unsigned tx_capacity; + int nic_arch; + int nic_revision; + char nic_variant; +}; + +/*! + * Obtain a list of hardware resource mappings, using virtual addresses + * + * \param vih The handle to identify the efx_vi + * \param mdata Pointer to a structure to receive the metadata + * \param hw_res_array An array to receive the list of hardware resources + * \param length The length of hw_res_array. Updated on success to contain + * the number of entries in the supplied array that were used. + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_hw_resource_get_virt(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length); + +/*! + * Obtain a list of hardware resource mappings, using physical addresses + * + * \param vih The handle to identify the efx_vi + * \param mdata Pointer to a structure to receive the metadata + * \param hw_res_array An array to receive the list of hardware resources + * \param length The length of hw_res_array. Updated on success to contain + * the number of entries in the supplied array that were used. + * \return Zero on success, non-zero on failure + */ +extern int +efx_vi_hw_resource_get_phys(struct efx_vi_state *vih, + struct efx_vi_hw_resource_metadata *mdata, + struct efx_vi_hw_resource *hw_res_array, + int *length); + +#endif /* __CI_DRIVER_RESOURCE_EFX_VI_H__ */ --- linux-ec2-2.6.31.orig/drivers/xen/privcmd/privcmd.c +++ linux-ec2-2.6.31/drivers/xen/privcmd/privcmd.c @@ -0,0 +1,327 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct proc_dir_entry *privcmd_intf; +static struct proc_dir_entry *capabilities_intf; + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + +static long privcmd_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + long ret = -ENOSYS; + void __user *udata = (void __user *) data; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: { + privcmd_hypercall_t hypercall; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + +#ifdef CONFIG_X86 + if (hypercall.op >= (PAGE_SIZE >> 5)) + break; + ret = _hypercall(long, (unsigned int)hypercall.op, + (unsigned long)hypercall.arg[0], + (unsigned long)hypercall.arg[1], + (unsigned long)hypercall.arg[2], + (unsigned long)hypercall.arg[3], + (unsigned long)hypercall.arg[4]); +#else + ret = privcmd_hypercall(&hypercall); +#endif + } + break; + + case IOCTL_PRIVCMD_MMAP: { +#define MMAP_NR_PER_PAGE (int)((PAGE_SIZE-sizeof(struct list_head))/sizeof(privcmd_mmap_entry_t)) + privcmd_mmap_t mmapcmd; + privcmd_mmap_entry_t *msg; + privcmd_mmap_entry_t __user *p; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long va; + int i, rc; + LIST_HEAD(pagelist); + struct list_head *l,*l2; + + if (!is_initial_xendomain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + p = mmapcmd.entry; + for (i = 0; i < mmapcmd.num;) { + int nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE); + + rc = -ENOMEM; + l = (struct list_head *) __get_free_page(GFP_KERNEL); + if (l == NULL) + goto mmap_out; + + INIT_LIST_HEAD(l); + list_add_tail(l, &pagelist); + msg = (privcmd_mmap_entry_t*)(l + 1); + + rc = -EFAULT; + if (copy_from_user(msg, p, nr*sizeof(*msg))) + goto mmap_out; + i += nr; + p += nr; + } + + l = pagelist.next; + msg = (privcmd_mmap_entry_t*)(l + 1); + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, msg->va); + rc = -EINVAL; + if (!vma || (msg->va != vma->vm_start) || + !privcmd_enforce_singleshot_mapping(vma)) + goto mmap_out; + + va = vma->vm_start; + + i = 0; + list_for_each(l, &pagelist) { + int nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE); + + msg = (privcmd_mmap_entry_t*)(l + 1); + while (inpages > (LONG_MAX >> PAGE_SHIFT)) || + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -va)) + goto mmap_out; + + /* Range chunks must be contiguous in va space. */ + if ((msg->va != va) || + ((msg->va+(msg->npages< vma->vm_end)) + goto mmap_out; + + if ((rc = direct_remap_pfn_range( + vma, + msg->va & PAGE_MASK, + msg->mfn, + msg->npages << PAGE_SHIFT, + vma->vm_page_prot, + mmapcmd.dom)) < 0) + goto mmap_out; + + va += msg->npages << PAGE_SHIFT; + msg++; + i++; + } + } + + rc = 0; + + mmap_out: + up_write(&mm->mmap_sem); + list_for_each_safe(l,l2,&pagelist) + free_page((unsigned long)l); + ret = rc; + } +#undef MMAP_NR_PER_PAGE + break; + + case IOCTL_PRIVCMD_MMAPBATCH: { +#define MMAPBATCH_NR_PER_PAGE (unsigned long)((PAGE_SIZE-sizeof(struct list_head))/sizeof(unsigned long)) + privcmd_mmapbatch_t m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + xen_pfn_t __user *p; + unsigned long addr, *mfn, nr_pages; + int i; + LIST_HEAD(pagelist); + struct list_head *l, *l2; + + if (!is_initial_xendomain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) + return -EFAULT; + + nr_pages = m.num; + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) + return -EINVAL; + + p = m.arr; + for (i=0; immap_sem); + + vma = find_vma(mm, m.addr); + ret = -EINVAL; + if (!vma || + (m.addr != vma->vm_start) || + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || + !privcmd_enforce_singleshot_mapping(vma)) { + up_write(&mm->mmap_sem); + goto mmapbatch_out; + } + + p = m.arr; + addr = m.addr; + i = 0; + ret = 0; + list_for_each(l, &pagelist) { + int nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); + mfn = (unsigned long *)(l + 1); + + while (ivm_page_prot, m.dom) < 0) { + *mfn |= 0xf0000000U; + ret++; + } + mfn++; i++; addr += PAGE_SIZE; + } + } + + up_write(&mm->mmap_sem); + if (ret > 0) { + p = m.arr; + i = 0; + ret = 0; + list_for_each(l, &pagelist) { + int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE); + mfn = (unsigned long *)(l + 1); + if (copy_to_user(p, mfn, nr*sizeof(*mfn))) + ret = -EFAULT; + i += nr; p += nr; + } + } + mmapbatch_out: + list_for_each_safe(l,l2,&pagelist) + free_page((unsigned long)l); +#undef MMAPBATCH_NR_PER_PAGE + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static struct vm_operations_struct privcmd_vm_ops = { + .fault = privcmd_fault +}; + +static int privcmd_mmap(struct file * file, struct vm_area_struct * vma) +{ + /* Unsupported for auto-translate guests. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY; + vma->vm_ops = &privcmd_vm_ops; + vma->vm_private_data = NULL; + + return 0; +} + +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +{ + return (xchg(&vma->vm_private_data, (void *)1) == NULL); +} +#endif + +static const struct file_operations privcmd_file_ops = { + .unlocked_ioctl = privcmd_ioctl, + .mmap = privcmd_mmap, +}; + +static int capabilities_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = 0; + *page = 0; + + if (is_initial_xendomain()) + len = sprintf( page, "control_d\n" ); + + *eof = 1; + return len; +} + +static int __init privcmd_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + privcmd_intf = create_xen_proc_entry("privcmd", 0400); + if (privcmd_intf != NULL) + privcmd_intf->proc_fops = &privcmd_file_ops; + + capabilities_intf = create_xen_proc_entry("capabilities", 0400 ); + if (capabilities_intf != NULL) + capabilities_intf->read_proc = capabilities_read; + + return 0; +} + +__initcall(privcmd_init); --- linux-ec2-2.6.31.orig/drivers/xen/privcmd/compat_privcmd.c +++ linux-ec2-2.6.31/drivers/xen/privcmd/compat_privcmd.c @@ -0,0 +1,72 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Jimi Xenidis + */ + +#include +#include +#include +#include +#include +#include +#include + +int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg) +{ + int ret; + + switch (cmd) { + case IOCTL_PRIVCMD_MMAP_32: { + struct privcmd_mmap *p; + struct privcmd_mmap_32 *p32; + struct privcmd_mmap_32 n32; + + p32 = compat_ptr(arg); + p = compat_alloc_user_space(sizeof(*p)); + if (copy_from_user(&n32, p32, sizeof(n32)) || + put_user(n32.num, &p->num) || + put_user(n32.dom, &p->dom) || + put_user(compat_ptr(n32.entry), &p->entry)) + return -EFAULT; + + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP, (unsigned long)p); + } + break; + case IOCTL_PRIVCMD_MMAPBATCH_32: { + struct privcmd_mmapbatch *p; + struct privcmd_mmapbatch_32 *p32; + struct privcmd_mmapbatch_32 n32; + + p32 = compat_ptr(arg); + p = compat_alloc_user_space(sizeof(*p)); + if (copy_from_user(&n32, p32, sizeof(n32)) || + put_user(n32.num, &p->num) || + put_user(n32.dom, &p->dom) || + put_user(n32.addr, &p->addr) || + put_user(compat_ptr(n32.arr), &p->arr)) + return -EFAULT; + + ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, (unsigned long)p); + } + break; + default: + ret = -EINVAL; + break; + } + return ret; +} --- linux-ec2-2.6.31.orig/drivers/xen/privcmd/Makefile +++ linux-ec2-2.6.31/drivers/xen/privcmd/Makefile @@ -0,0 +1,3 @@ + +obj-y += privcmd.o +obj-$(CONFIG_COMPAT) += compat_privcmd.o --- linux-ec2-2.6.31.orig/drivers/xen/core/pci.c +++ linux-ec2-2.6.31/drivers/xen/core/pci.c @@ -0,0 +1,83 @@ +/* + * vim:shiftwidth=8:noexpandtab + */ + +#include +#include +#include +#include +#include "../../pci/pci.h" + +static int (*pci_bus_probe)(struct device *dev); +static int (*pci_bus_remove)(struct device *dev); + +static int pci_bus_probe_wrapper(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + struct physdev_manage_pci_ext manage_pci_ext; + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + memset(&manage_pci_ext, 0, sizeof(manage_pci_ext)); + manage_pci_ext.bus = pci_dev->bus->number; + manage_pci_ext.devfn = pci_dev->devfn; + manage_pci_ext.is_virtfn = 1; + manage_pci_ext.physfn.bus = pci_dev->physfn->bus->number; + manage_pci_ext.physfn.devfn = pci_dev->physfn->devfn; + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + memset(&manage_pci_ext, 0, sizeof(manage_pci_ext)); + manage_pci_ext.bus = pci_dev->bus->number; + manage_pci_ext.devfn = pci_dev->devfn; + manage_pci_ext.is_extfn = 1; + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else { + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, + &manage_pci); + } + if (r && r != -ENOSYS) + return r; + + r = pci_bus_probe(dev); + return r; +} + +static int pci_bus_remove_wrapper(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = pci_bus_remove(dev); + /* dev and pci_dev are no longer valid!! */ + + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci)); + return r; +} + +static int __init hook_pci_bus(void) +{ + if (!is_running_on_xen() || !is_initial_xendomain()) + return 0; + + pci_bus_probe = pci_bus_type.probe; + pci_bus_type.probe = pci_bus_probe_wrapper; + + pci_bus_remove = pci_bus_type.remove; + pci_bus_type.remove = pci_bus_remove_wrapper; + + return 0; +} + +core_initcall(hook_pci_bus); --- linux-ec2-2.6.31.orig/drivers/xen/core/cpu_hotplug.c +++ linux-ec2-2.6.31/drivers/xen/core/cpu_hotplug.c @@ -0,0 +1,181 @@ +#include +#include +#include +#include +#include +#include +#include + +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_var_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_var_t local_allowed_cpumask; + +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + +static void __cpuinit vcpu_hotplug(unsigned int cpu) +{ + int err; + char dir[32], state[32]; + + if ((cpu >= NR_CPUS) || !cpu_possible(cpu)) + return; + + sprintf(dir, "cpu/%u", cpu); + err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); + if (err != 1) { + printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); + return; + } + + if (strcmp(state, "online") == 0) { + cpumask_set_cpu(cpu, xenbus_allowed_cpumask); + (void)cpu_up(cpu); + } else if (strcmp(state, "offline") == 0) { + cpumask_clear_cpu(cpu, xenbus_allowed_cpumask); + (void)cpu_down(cpu); + } else { + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", + state, cpu); + } +} + +static void __cpuinit handle_vcpu_hotplug_event( + struct xenbus_watch *watch, const char **vec, unsigned int len) +{ + unsigned int cpu; + char *cpustr; + const char *node = vec[XS_WATCH_PATH]; + + if ((cpustr = strstr(node, "cpu/")) != NULL) { + sscanf(cpustr, "cpu/%u", &cpu); + vcpu_hotplug(cpu); + } +} + +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpumask_clear_cpu(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + +static int __cpuinit setup_cpu_watcher(struct notifier_block *notifier, + unsigned long event, void *data) +{ + unsigned int i; + + static struct xenbus_watch __cpuinitdata cpu_watch = { + .node = "cpu", + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; + (void)register_xenbus_watch(&cpu_watch); + + if (!is_initial_xendomain()) { + for_each_possible_cpu(i) + vcpu_hotplug(i); + printk(KERN_INFO "Brought up %ld CPUs\n", + (long)num_online_cpus()); + } + + return NOTIFY_DONE; +} + +static int __init setup_vcpu_hotplug_event(void) +{ + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; + static struct notifier_block __cpuinitdata xsn_cpu = { + .notifier_call = setup_cpu_watcher }; + + if (!is_running_on_xen()) + return -ENODEV; + + register_cpu_notifier(&hotplug_cpu); + register_xenstore_notifier(&xsn_cpu); + + return 0; +} + +arch_initcall(setup_vcpu_hotplug_event); + +int __ref smp_suspend(void) +{ + unsigned int cpu; + int err; + + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + err = cpu_down(cpu); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_possible_cpu(cpu) + vcpu_hotplug(cpu); + return err; + } + } + + return 0; +} + +void __ref smp_resume(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; + vcpu_hotplug(cpu); + } +} + +int cpu_up_check(unsigned int cpu) +{ + int rc = 0; + + if (local_cpu_hotplug_request()) { + cpumask_set_cpu(cpu, local_allowed_cpumask); + if (!cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + rc = -EBUSY; + } + } else if (!cpumask_test_cpu(cpu, local_allowed_cpumask) || + !cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) { + rc = -EBUSY; + } + + return rc; +} + +void __init init_xenbus_allowed_cpumask(void) +{ + if (!alloc_cpumask_var(&xenbus_allowed_cpumask, GFP_KERNEL)) + BUG(); + cpumask_copy(xenbus_allowed_cpumask, cpu_present_mask); + if (!alloc_cpumask_var(&local_allowed_cpumask, GFP_KERNEL)) + BUG(); + cpumask_setall(local_allowed_cpumask); +} --- linux-ec2-2.6.31.orig/drivers/xen/core/machine_kexec.c +++ linux-ec2-2.6.31/drivers/xen/core/machine_kexec.c @@ -0,0 +1,268 @@ +/* + * drivers/xen/core/machine_kexec.c + * handle transition of Linux booting another kernel + */ + +#include +#include +#include +#include +#include + +extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, + struct kimage *image); +extern int machine_kexec_setup_resources(struct resource *hypervisor, + struct resource *phys_cpus, + int nr_phys_cpus); +extern void machine_kexec_register_resources(struct resource *res); + +static int __initdata xen_max_nr_phys_cpus; +static struct resource xen_hypervisor_res; +static struct resource *xen_phys_cpus; + +size_t vmcoreinfo_size_xen; +unsigned long paddr_vmcoreinfo_xen; + +void __init xen_machine_kexec_setup_resources(void) +{ + xen_kexec_range_t range; + struct resource *res; + int k = 0; + int rc; + + if (strstr(boot_command_line, "crashkernel=")) + printk(KERN_WARNING "Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); + + if (!is_initial_xendomain()) + return; + + /* determine maximum number of physical cpus */ + + while (1) { + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CPU; + range.nr = k; + + if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) + break; + + k++; + } + + if (k == 0) + return; + + xen_max_nr_phys_cpus = k; + + /* allocate xen_phys_cpus */ + + xen_phys_cpus = alloc_bootmem(k * sizeof(struct resource)); + BUG_ON(xen_phys_cpus == NULL); + + /* fill in xen_phys_cpus with per-cpu crash note information */ + + for (k = 0; k < xen_max_nr_phys_cpus; k++) { + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CPU; + range.nr = k; + + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) + goto err; + + res = xen_phys_cpus + k; + + memset(res, 0, sizeof(*res)); + res->name = "Crash note"; + res->start = range.start; + res->end = range.start + range.size - 1; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + } + + /* fill in xen_hypervisor_res with hypervisor machine address range */ + + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_XEN; + + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) + goto err; + + xen_hypervisor_res.name = "Hypervisor code and data"; + xen_hypervisor_res.start = range.start; + xen_hypervisor_res.end = range.start + range.size - 1; + xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM; +#ifdef CONFIG_X86 + insert_resource(&iomem_resource, &xen_hypervisor_res); +#endif + + /* fill in crashk_res if range is reserved by hypervisor */ + + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CRASH; + + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) + goto err; + + if (range.size) { + crashk_res.start = range.start; + crashk_res.end = range.start + range.size - 1; +#ifdef CONFIG_X86 + insert_resource(&iomem_resource, &crashk_res); +#endif + } + + /* get physical address of vmcoreinfo */ + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_VMCOREINFO; + + rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range); + + if (rc == 0) { + /* Hypercall succeeded */ + vmcoreinfo_size_xen = range.size; + paddr_vmcoreinfo_xen = range.start; + + } else { + /* Hypercall failed. + * Indicate not to create sysfs file by resetting globals + */ + vmcoreinfo_size_xen = 0; + paddr_vmcoreinfo_xen = 0; + + /* The KEXEC_CMD_kexec_get_range hypercall did not implement + * KEXEC_RANGE_MA_VMCOREINFO until Xen 3.3. + * Do not bail out if it fails for this reason. + */ + if (rc != -EINVAL) + return; + } + + if (machine_kexec_setup_resources(&xen_hypervisor_res, xen_phys_cpus, + xen_max_nr_phys_cpus)) + goto err; + +#ifdef CONFIG_X86 + for (k = 0; k < xen_max_nr_phys_cpus; k++) { + res = xen_phys_cpus + k; + if (!res->parent) /* outside of xen_hypervisor_res range */ + insert_resource(&iomem_resource, res); + } + + if (xen_create_contiguous_region((unsigned long)&vmcoreinfo_note, + get_order(sizeof(vmcoreinfo_note)), + BITS_PER_LONG)) + goto err; +#endif + + return; + + err: + /* + * It isn't possible to free xen_phys_cpus this early in the + * boot. Failure at this stage is unexpected and the amount of + * memory is small therefore we tolerate the potential leak. + */ + xen_max_nr_phys_cpus = 0; + return; +} + +#ifndef CONFIG_X86 +void __init xen_machine_kexec_register_resources(struct resource *res) +{ + int k; + struct resource *r; + + request_resource(res, &xen_hypervisor_res); + for (k = 0; k < xen_max_nr_phys_cpus; k++) { + r = xen_phys_cpus + k; + if (r->parent == NULL) /* out of xen_hypervisor_res range */ + request_resource(res, r); + } + machine_kexec_register_resources(res); +} +#endif + +static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) +{ + machine_kexec_setup_load_arg(xki, image); + + xki->indirection_page = image->head; + xki->start_address = image->start; +} + +/* + * Load the image into xen so xen can kdump itself + * This might have been done in prepare, but prepare + * is currently called too early. It might make sense + * to move prepare, but for now, just add an extra hook. + */ +int xen_machine_kexec_load(struct kimage *image) +{ + xen_kexec_load_t xkl; + + memset(&xkl, 0, sizeof(xkl)); + xkl.type = image->type; + setup_load_arg(&xkl.image, image); + return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl); +} + +/* + * Unload the image that was stored by machine_kexec_load() + * This might have been done in machine_kexec_cleanup() but it + * is called too late, and its possible xen could try and kdump + * using resources that have been freed. + */ +void xen_machine_kexec_unload(struct kimage *image) +{ + xen_kexec_load_t xkl; + + memset(&xkl, 0, sizeof(xkl)); + xkl.type = image->type; + WARN_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl)); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + * + * This has the hypervisor move to the prefered reboot CPU, + * stop all CPUs and kexec. That is it combines machine_shutdown() + * and machine_kexec() in Linux kexec terms. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + xen_kexec_exec_t xke; + + memset(&xke, 0, sizeof(xke)); + xke.type = image->type; + VOID(HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke)); + panic("KEXEC_CMD_kexec hypercall should not return\n"); +} + +#ifdef CONFIG_X86 +unsigned long paddr_vmcoreinfo_note(void) +{ + return virt_to_machine(&vmcoreinfo_note); +} +#endif + +void machine_shutdown(void) +{ + /* do nothing */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* The kernel is broken so disable interrupts */ + local_irq_disable(); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- linux-ec2-2.6.31.orig/drivers/xen/core/features.c +++ linux-ec2-2.6.31/drivers/xen/core/features.c @@ -0,0 +1,34 @@ +/****************************************************************************** + * features.c + * + * Xen feature flags. + * + * Copyright (c) 2006, Ian Campbell, XenSource Inc. + */ +#include +#include +#include +#include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ +EXPORT_SYMBOL(xen_features); + +void xen_setup_features(void) +{ + xen_feature_info_t fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + for (j=0; j<32; j++) + xen_features[i*32+j] = !!(fi.submap & 1< +#include +#include +#include + +#include "domctl.h" + +/* stuff copied from xen/interface/domctl.h, which we can't + * include directly for the reasons outlined above .... */ + +#define XEN_DOMCTL_get_address_size 36 +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; + +typedef __attribute__((aligned(8))) uint64_t uint64_aligned_t; + +union xen_domctl { + /* v4: sles10 sp1: xen 3.0.4 + 32-on-64 patches */ + struct { + uint32_t cmd; + uint32_t interface_version; + domid_t domain; + union { + /* left out lots of other struct xen_domctl_foobar */ + struct xen_domctl_address_size address_size; + uint64_t dummy_align; + uint8_t dummy_pad[128]; + } u; + } v4; + + /* v5: upstream: xen 3.1 */ + struct { + uint32_t cmd; + uint32_t interface_version; + domid_t domain; + union { + struct xen_domctl_address_size address_size; + uint64_aligned_t dummy_align; + uint8_t dummy_pad[128]; + } u; + } v5; +}; + +/* The actual code comes here */ + +static inline int hypervisor_domctl(void *domctl) +{ + return _hypercall1(int, domctl, domctl); +} + +int xen_guest_address_size(int domid) +{ + union xen_domctl domctl; + int low, ret; + +#define guest_address_size(ver) do { \ + memset(&domctl, 0, sizeof(domctl)); \ + domctl.v##ver.cmd = XEN_DOMCTL_get_address_size; \ + domctl.v##ver.interface_version = low = ver; \ + domctl.v##ver.domain = domid; \ + ret = hypervisor_domctl(&domctl) ?: domctl.v##ver.u.address_size.size; \ + if (ret == 32 || ret == 64) { \ + printk("v" #ver " domctl worked ok: dom%d is %d-bit\n", \ + domid, ret); \ + return ret; \ + } \ +} while (0) + + guest_address_size(5); +#if CONFIG_XEN_COMPAT < 0x030100 + guest_address_size(4); +#endif + + ret = BITS_PER_LONG; + printk("v%d...5 domctls failed, assuming dom%d is native: %d\n", + low, domid, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_guest_address_size); + +int xen_guest_blkif_protocol(int domid) +{ + int address_size = xen_guest_address_size(domid); + + if (address_size == BITS_PER_LONG) + return BLKIF_PROTOCOL_NATIVE; + if (address_size == 32) + return BLKIF_PROTOCOL_X86_32; + if (address_size == 64) + return BLKIF_PROTOCOL_X86_64; + return BLKIF_PROTOCOL_NATIVE; +} +EXPORT_SYMBOL_GPL(xen_guest_blkif_protocol); --- linux-ec2-2.6.31.orig/drivers/xen/core/domctl.h +++ linux-ec2-2.6.31/drivers/xen/core/domctl.h @@ -0,0 +1,2 @@ +int xen_guest_address_size(int domid); +int xen_guest_blkif_protocol(int domid); --- linux-ec2-2.6.31.orig/drivers/xen/core/smpboot.c +++ linux-ec2-2.6.31/drivers/xen/core/smpboot.c @@ -0,0 +1,476 @@ +/* + * Xen SMP booting functions + * + * See arch/i386/kernel/smpboot.c for copyright and credits for derived + * portions of this file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int local_setup_timer(unsigned int cpu); +extern void local_teardown_timer(unsigned int cpu); + +extern void hypervisor_callback(void); +extern void failsafe_callback(void); +extern void system_call(void); +extern void smp_trap_init(trap_info_t *); + +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; + +cpumask_var_t vcpu_initialized_mask; + +DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); + +static int __read_mostly resched_irq = -1; +static int __read_mostly callfunc_irq = -1; +static int __read_mostly call1func_irq = -1; +static int __read_mostly reboot_irq = -1; + +#ifdef CONFIG_X86_LOCAL_APIC +#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid)) +#else +#define set_cpu_to_apicid(cpu, apicid) +#endif + +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); + +void __init prefill_possible_map(void) +{ + int i, rc; + + for_each_possible_cpu(i) + if (i != smp_processor_id()) + return; + + for (i = 0; i < NR_CPUS; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) { + set_cpu_possible(i, true); + nr_cpu_ids = i + 1; + } + } +} + +static inline void +set_cpu_sibling_map(unsigned int cpu) +{ + cpu_data(cpu).phys_proc_id = cpu; + cpu_data(cpu).cpu_core_id = 0; + + cpumask_copy(cpu_sibling_mask(cpu), cpumask_of(cpu)); + cpumask_copy(cpu_core_mask(cpu), cpumask_of(cpu)); + + cpu_data(cpu).booted_cores = 1; +} + +static void +remove_siblinginfo(unsigned int cpu) +{ + cpu_data(cpu).phys_proc_id = BAD_APICID; + cpu_data(cpu).cpu_core_id = BAD_APICID; + + cpumask_clear(cpu_sibling_mask(cpu)); + cpumask_clear(cpu_core_mask(cpu)); + + cpu_data(cpu).booted_cores = 0; +} + +static int __cpuinit xen_smp_intr_init(unsigned int cpu) +{ + static struct irqaction resched_action = { + .handler = smp_reschedule_interrupt, + .flags = IRQF_DISABLED, + .name = "resched" + }, callfunc_action = { + .handler = smp_call_function_interrupt, + .flags = IRQF_DISABLED, + .name = "callfunc" + }, call1func_action = { + .handler = smp_call_function_single_interrupt, + .flags = IRQF_DISABLED, + .name = "call1func" + }, reboot_action = { + .handler = smp_reboot_interrupt, + .flags = IRQF_DISABLED, + .name = "reboot" + }; + int rc; + + rc = bind_ipi_to_irqaction(RESCHEDULE_VECTOR, + cpu, + &resched_action); + if (rc < 0) + return rc; + if (resched_irq < 0) + resched_irq = rc; + else + BUG_ON(resched_irq != rc); + + rc = bind_ipi_to_irqaction(CALL_FUNCTION_VECTOR, + cpu, + &callfunc_action); + if (rc < 0) + goto unbind_resched; + if (callfunc_irq < 0) + callfunc_irq = rc; + else + BUG_ON(callfunc_irq != rc); + + rc = bind_ipi_to_irqaction(CALL_FUNC_SINGLE_VECTOR, + cpu, + &call1func_action); + if (rc < 0) + goto unbind_call; + if (call1func_irq < 0) + call1func_irq = rc; + else + BUG_ON(call1func_irq != rc); + + rc = bind_ipi_to_irqaction(REBOOT_VECTOR, + cpu, + &reboot_action); + if (rc < 0) + goto unbind_call1; + if (reboot_irq < 0) + reboot_irq = rc; + else + BUG_ON(reboot_irq != rc); + + rc = xen_spinlock_init(cpu); + if (rc < 0) + goto unbind_reboot; + + if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0)) + goto fail; + + return 0; + + fail: + xen_spinlock_cleanup(cpu); + unbind_reboot: + unbind_from_per_cpu_irq(reboot_irq, cpu, NULL); + unbind_call1: + unbind_from_per_cpu_irq(call1func_irq, cpu, NULL); + unbind_call: + unbind_from_per_cpu_irq(callfunc_irq, cpu, NULL); + unbind_resched: + unbind_from_per_cpu_irq(resched_irq, cpu, NULL); + return rc; +} + +#ifdef CONFIG_HOTPLUG_CPU +static void __cpuinit xen_smp_intr_exit(unsigned int cpu) +{ + if (cpu != 0) + local_teardown_timer(cpu); + + unbind_from_per_cpu_irq(resched_irq, cpu, NULL); + unbind_from_per_cpu_irq(callfunc_irq, cpu, NULL); + unbind_from_per_cpu_irq(call1func_irq, cpu, NULL); + unbind_from_per_cpu_irq(reboot_irq, cpu, NULL); + xen_spinlock_cleanup(cpu); +} +#endif + +static void __cpuinit cpu_bringup(void) +{ + cpu_init(); + identify_secondary_cpu(¤t_cpu_data); + touch_softlockup_watchdog(); + preempt_disable(); + local_irq_enable(); +} + +static void __cpuinit cpu_bringup_and_idle(void) +{ + cpu_bringup(); + cpu_idle(); +} + +static void __cpuinit cpu_initialize_context(unsigned int cpu) +{ + /* vcpu_guest_context_t is too large to allocate on the stack. + * Hence we allocate statically and protect it with a lock */ + static vcpu_guest_context_t ctxt; + static DEFINE_SPINLOCK(ctxt_lock); + + struct task_struct *idle = idle_task(cpu); + + if (cpumask_test_and_set_cpu(cpu, vcpu_initialized_mask)) + return; + + spin_lock(&ctxt_lock); + + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.flags = VGCF_IN_KERNEL; + ctxt.user_regs.ds = __USER_DS; + ctxt.user_regs.es = __USER_DS; + ctxt.user_regs.fs = 0; + ctxt.user_regs.gs = 0; + ctxt.user_regs.ss = __KERNEL_DS; + ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */ + + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + smp_trap_init(ctxt.trap_ctxt); + + ctxt.ldt_ents = 0; + ctxt.gdt_frames[0] = arbitrary_virt_to_mfn(get_cpu_gdt_table(cpu)); + ctxt.gdt_ents = GDT_SIZE / 8; + + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); + + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_sp = idle->thread.sp0; + + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; +#ifdef __i386__ + ctxt.event_callback_cs = __KERNEL_CS; + ctxt.failsafe_callback_cs = __KERNEL_CS; + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + + ctxt.user_regs.fs = __KERNEL_PERCPU; + ctxt.user_regs.gs = __KERNEL_STACK_CANARY; +#else /* __x86_64__ */ + ctxt.syscall_callback_eip = (unsigned long)system_call; + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); + + ctxt.gs_base_kernel = per_cpu_offset(cpu); +#endif + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)) + BUG(); + + spin_unlock(&ctxt_lock); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int cpu; + struct task_struct *idle; + int apicid; + struct vcpu_get_physid cpu_id; + void *gdt_addr; + + apicid = 0; + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); + boot_cpu_data.apicid = apicid; + cpu_data(0) = boot_cpu_data; + + set_cpu_to_apicid(0, apicid); + + current_thread_info()->cpu = 0; + + for_each_possible_cpu (cpu) { + alloc_cpumask_var(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL); + alloc_cpumask_var(&per_cpu(cpu_core_map, cpu), GFP_KERNEL); + cpumask_clear(cpu_sibling_mask(cpu)); + cpumask_clear(cpu_core_mask(cpu)); + } + + set_cpu_sibling_map(0); + + if (xen_smp_intr_init(0)) + BUG(); + + if (!alloc_cpumask_var(&vcpu_initialized_mask, GFP_KERNEL)) + BUG(); + cpumask_copy(vcpu_initialized_mask, cpumask_of(0)); + + /* Restrict the possible_map according to max_cpus. */ + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { + for (cpu = nr_cpu_ids-1; !cpumask_test_cpu(cpu, cpu_possible_mask); cpu--) + continue; + set_cpu_possible(cpu, false); + } + + for_each_possible_cpu (cpu) { + if (cpu == 0) + continue; + + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + + gdt_addr = get_cpu_gdt_table(cpu); + make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); + + apicid = cpu; + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); + cpu_data(cpu) = boot_cpu_data; + cpu_data(cpu).cpu_index = cpu; + cpu_data(cpu).apicid = apicid; + + set_cpu_to_apicid(cpu, apicid); + +#ifdef __x86_64__ + clear_tsk_thread_flag(idle, TIF_FORK); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; +#endif + per_cpu(current_task, cpu) = idle; + + irq_ctx_init(cpu); + +#ifdef CONFIG_HOTPLUG_CPU + if (is_initial_xendomain()) +#endif + set_cpu_present(cpu, true); + } + + init_xenbus_allowed_cpumask(); + +#ifdef CONFIG_X86_IO_APIC + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if (cpu_has_apic && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif +} + +void __init smp_prepare_boot_cpu(void) +{ + unsigned int cpu; + + switch_to_new_gdt(smp_processor_id()); + prefill_possible_map(); + for_each_possible_cpu(cpu) + if (cpu != smp_processor_id()) + setup_vcpu_info(cpu); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Initialize cpu_present_map late to skip SMP boot code in init/main.c. + * But do it early enough to catch critical for_each_present_cpu() loops + * in i386-specific code. + */ +static int __init initialize_cpu_present_map(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + set_cpu_present(cpu, true); + + return 0; +} +core_initcall(initialize_cpu_present_map); + +int __cpuexit __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + + if (cpu == 0) + return -EBUSY; + + remove_siblinginfo(cpu); + + set_cpu_online(cpu, false); + fixup_irqs(); + + return 0; +} + +void __cpuinit __cpu_die(unsigned int cpu) +{ + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + + xen_smp_intr_exit(cpu); + + if (num_online_cpus() == 1) + alternatives_smp_switch(0); +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +int __cpuinit __cpu_up(unsigned int cpu) +{ + int rc; + + rc = cpu_up_check(cpu); + if (rc) + return rc; + + cpu_initialize_context(cpu); + + if (num_online_cpus() == 1) + alternatives_smp_switch(1); + + /* This must be done before setting cpu_online_map */ + set_cpu_sibling_map(cpu); + wmb(); + + rc = xen_smp_intr_init(cpu); + if (rc) { + remove_siblinginfo(cpu); + return rc; + } + + set_cpu_online(cpu, true); + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + BUG_ON(rc); + + return 0; +} + +void __ref play_dead(void) +{ + idle_task_exit(); + local_irq_disable(); + cpumask_clear_cpu(smp_processor_id(), cpu_initialized_mask); + preempt_enable_no_resched(); + VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL)); +#ifdef CONFIG_HOTPLUG_CPU + cpu_bringup(); +#else + BUG(); +#endif +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +#ifndef CONFIG_X86_LOCAL_APIC +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} +#endif --- linux-ec2-2.6.31.orig/drivers/xen/core/evtchn.c +++ linux-ec2-2.6.31/drivers/xen/core/evtchn.c @@ -0,0 +1,1829 @@ +/****************************************************************************** + * evtchn.c + * + * Communication via Xen event channels. + * + * Copyright (c) 2002-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* RTC_IRQ */ + +/* + * This lock protects updates to the following mapping and reference-count + * arrays. The lock does not need to be acquired to read the mapping tables. + */ +static DEFINE_SPINLOCK(irq_mapping_update_lock); + +/* IRQ <-> event-channel mappings. */ +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 }; + +#if defined(CONFIG_SMP) && defined(CONFIG_X86) +static struct per_cpu_irqaction { + struct irqaction action; /* must be first */ + struct per_cpu_irqaction *next; + cpumask_t cpus; +} *virq_actions[NR_VIRQS]; +/* IRQ <-> VIRQ mapping. */ +static DECLARE_BITMAP(virq_per_cpu, NR_VIRQS) __read_mostly; +static DEFINE_PER_CPU(int[NR_VIRQS], virq_to_evtchn); +#define BUG_IF_VIRQ_PER_CPU(irq) \ + BUG_ON(type_from_irq(irq) == IRQT_VIRQ \ + && test_bit(index_from_irq(irq), virq_per_cpu)) +#else +#define BUG_IF_VIRQ_PER_CPU(irq) ((void)(irq)) +#define PER_CPU_VIRQ_IRQ +#endif + +/* IRQ <-> IPI mapping. */ +#ifndef NR_IPIS +#define NR_IPIS 1 +#endif +#if defined(CONFIG_SMP) && defined(CONFIG_X86) +static int ipi_to_irq[NR_IPIS] __read_mostly = {[0 ... NR_IPIS-1] = -1}; +static DEFINE_PER_CPU(int[NR_IPIS], ipi_to_evtchn); +#else +#define PER_CPU_IPI_IRQ +#endif +#if !defined(CONFIG_SMP) || !defined(PER_CPU_IPI_IRQ) +#define BUG_IF_IPI(irq) BUG_ON(type_from_irq(irq) == IRQT_IPI) +#else +#define BUG_IF_IPI(irq) ((void)(irq)) +#endif + +/* Binding types. */ +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_LOCAL_PORT, + IRQT_CALLER_PORT, + _IRQT_COUNT +}; + +#define _IRQT_BITS 4 +#define _EVTCHN_BITS 12 +#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS) + +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND (IRQT_UNBOUND << (32 - _IRQT_BITS)) + +static struct irq_cfg _irq_cfg[] = { + [0 ... +#ifdef CONFIG_SPARSE_IRQ + BUILD_BUG_ON_ZERO(PIRQ_BASE) + NR_IRQS_LEGACY +#else + NR_IRQS +#endif + - 1].info = IRQ_UNBOUND +}; + +static inline struct irq_cfg *__pure irq_cfg(unsigned int irq) +{ +#ifdef CONFIG_SPARSE_IRQ + struct irq_desc *desc = irq_to_desc(irq); + + return desc ? desc->chip_data : NULL; +#else + return irq < NR_IRQS ? _irq_cfg + irq : NULL; +#endif +} + +/* Constructor for packed IRQ information. */ +static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn) +{ + BUILD_BUG_ON(_IRQT_COUNT > (1U << _IRQT_BITS)); + + BUILD_BUG_ON(NR_PIRQS > (1U << _INDEX_BITS)); + BUILD_BUG_ON(NR_VIRQS > (1U << _INDEX_BITS)); + BUILD_BUG_ON(NR_IPIS > (1U << _INDEX_BITS)); + BUG_ON(index >> _INDEX_BITS); + + BUILD_BUG_ON(NR_EVENT_CHANNELS > (1U << _EVTCHN_BITS)); + + return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn); +} + +/* + * Accessors for packed IRQ information. + */ + +static inline unsigned int index_from_irq(int irq) +{ + const struct irq_cfg *cfg = irq_cfg(irq); + + return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1) + : 0; +} + +static inline unsigned int type_from_irq(int irq) +{ + const struct irq_cfg *cfg = irq_cfg(irq); + + return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND; +} + +static inline unsigned int evtchn_from_per_cpu_irq(unsigned int irq, + unsigned int cpu) +{ + switch (type_from_irq(irq)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: + return per_cpu(virq_to_evtchn, cpu)[index_from_irq(irq)]; +#endif +#ifndef PER_CPU_IPI_IRQ + case IRQT_IPI: + return per_cpu(ipi_to_evtchn, cpu)[index_from_irq(irq)]; +#endif + } + BUG(); + return 0; +} + +static inline unsigned int evtchn_from_irq(unsigned int irq) +{ + const struct irq_cfg *cfg; + + switch (type_from_irq(irq)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: +#endif +#ifndef PER_CPU_IPI_IRQ + case IRQT_IPI: +#endif + return evtchn_from_per_cpu_irq(irq, smp_processor_id()); + } + cfg = irq_cfg(irq); + return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0; +} + +/* IRQ <-> VIRQ mapping. */ +DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; + +#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) +/* IRQ <-> IPI mapping. */ +DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; +#endif + +#ifdef CONFIG_SMP + +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; +static DEFINE_PER_CPU(unsigned long[BITS_TO_LONGS(NR_EVENT_CHANNELS)], + cpu_evtchn_mask); + +static inline unsigned long active_evtchns(unsigned int idx) +{ + shared_info_t *sh = HYPERVISOR_shared_info; + + return (sh->evtchn_pending[idx] & + percpu_read(cpu_evtchn_mask[idx]) & + ~sh->evtchn_mask[idx]); +} + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + shared_info_t *s = HYPERVISOR_shared_info; + int irq = evtchn_to_irq[chn]; + + BUG_ON(!test_bit(chn, s->evtchn_mask)); + + if (irq != -1) { + struct irq_desc *desc = irq_to_desc(irq); + + if (!(desc->status & IRQ_PER_CPU)) + cpumask_copy(desc->affinity, cpumask_of(cpu)); + else + cpumask_set_cpu(cpu, desc->affinity); + } + + clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn])); + set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ + int i; + + /* By default all event channels notify CPU#0. */ + for (i = 0; i < nr_irqs; i++) { + struct irq_desc *desc = irq_to_desc(i); + + if (desc) + cpumask_copy(desc->affinity, cpumask_of(0)); + } + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(per_cpu(cpu_evtchn_mask, 0), ~0, sizeof(per_cpu(cpu_evtchn_mask, 0))); +} + +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + return cpu_evtchn[evtchn]; +} + +#else + +static inline unsigned long active_evtchns(unsigned int idx) +{ + shared_info_t *sh = HYPERVISOR_shared_info; + + return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); +} + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ +} + +static void init_evtchn_cpu_bindings(void) +{ +} + +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + return 0; +} + +#endif + +#ifdef CONFIG_X86 +void __init xen_init_IRQ(void); +void __init init_IRQ(void) +{ + irq_ctx_init(0); + xen_init_IRQ(); +} +#include +#endif + +/* Xen will never allocate port zero for any purpose. */ +#define VALID_EVTCHN(chn) ((chn) != 0) + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + VOID(HYPERVISOR_xen_version(0, NULL)); +} +/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ +EXPORT_SYMBOL(force_evtchn_callback); + +static DEFINE_PER_CPU(unsigned int, upcall_count) = { 0 }; +static DEFINE_PER_CPU(unsigned int, last_processed_l1i) = { BITS_PER_LONG - 1 }; +static DEFINE_PER_CPU(unsigned int, last_processed_l2i) = { BITS_PER_LONG - 1 }; + +#ifndef vcpu_info_xchg +#define vcpu_info_xchg(fld, val) xchg(¤t_vcpu_info()->fld, val) +#endif + +#ifndef percpu_xadd +#define percpu_xadd(var, val) \ +({ \ + typeof(per_cpu_var(var)) __tmp_var__; \ + unsigned long flags; \ + local_irq_save(flags); \ + __tmp_var__ = get_cpu_var(var); \ + __get_cpu_var(var) += (val); \ + put_cpu_var(var); \ + local_irq_restore(flags); \ + __tmp_var__; \ +}) +#endif + +/* NB. Interrupts are disabled on entry. */ +asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long l1, l2; + unsigned long masked_l1, masked_l2; + unsigned int l1i, l2i, port, count; + int irq; + + exit_idle(); + irq_enter(); + + do { + /* Avoid a callback storm when we reenable delivery. */ + vcpu_info_write(evtchn_upcall_pending, 0); + + /* Nested invocations bail immediately. */ + if (unlikely(percpu_xadd(upcall_count, 1))) + break; + +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#else + barrier(); +#endif + l1 = vcpu_info_xchg(evtchn_pending_sel, 0); + + l1i = percpu_read(last_processed_l1i); + l2i = percpu_read(last_processed_l2i); + + while (l1 != 0) { + + l1i = (l1i + 1) % BITS_PER_LONG; + masked_l1 = l1 & ((~0UL) << l1i); + + if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */ + l1i = BITS_PER_LONG - 1; + l2i = BITS_PER_LONG - 1; + continue; + } + l1i = __ffs(masked_l1); + + do { + l2 = active_evtchns(l1i); + + l2i = (l2i + 1) % BITS_PER_LONG; + masked_l2 = l2 & ((~0UL) << l2i); + + if (masked_l2 == 0) { /* if we masked out all events, move on */ + l2i = BITS_PER_LONG - 1; + break; + } + + l2i = __ffs(masked_l2); + + /* process port */ + port = (l1i * BITS_PER_LONG) + l2i; + if (unlikely((irq = evtchn_to_irq[port]) == -1)) + evtchn_device_upcall(port); + else if (!handle_irq(irq, regs) && printk_ratelimit()) + printk(KERN_EMERG "%s(%d): No handler for irq %d\n", + __func__, smp_processor_id(), irq); + + /* if this is the final port processed, we'll pick up here+1 next time */ + percpu_write(last_processed_l1i, l1i); + percpu_write(last_processed_l2i, l2i); + + } while (l2i != BITS_PER_LONG - 1); + + l2 = active_evtchns(l1i); + if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ + l1 &= ~(1UL << l1i); + + } + + /* If there were nested callbacks then we have more to do. */ + count = percpu_read(upcall_count); + percpu_write(upcall_count, 0); + } while (unlikely(count != 1)); + + irq_exit(); + set_irq_regs(old_regs); +} + +static struct irq_chip dynirq_chip; + +static int find_unbound_irq(unsigned int cpu, bool percpu) +{ + static int warned; + int irq; + + for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) { + struct irq_desc *desc = irq_to_desc_alloc_node(irq, cpu_to_node(cpu)); + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->bindcount) { + irq_flow_handler_t handle; + const char *name; + + desc->status |= IRQ_NOPROBE; + if (!percpu) { + handle = handle_level_irq; + name = "level"; + } else { + handle = handle_percpu_irq; + name = "percpu"; + } + set_irq_chip_and_handler_name(irq, &dynirq_chip, + handle, name); + return irq; + } + } + + if (!warned) { + warned = 1; + printk(KERN_WARNING "No available IRQ to bind to: " + "increase NR_DYNIRQS.\n"); + } + + return -ENOSPC; +} + +static int bind_caller_port_to_irq(unsigned int caller_port) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + if ((irq = evtchn_to_irq[caller_port]) == -1) { + if ((irq = find_unbound_irq(smp_processor_id(), false)) < 0) + goto out; + + evtchn_to_irq[caller_port] = irq; + irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT, + 0, caller_port); + } + + irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + +static int bind_local_port_to_irq(unsigned int local_port) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + BUG_ON(evtchn_to_irq[local_port] != -1); + + if ((irq = find_unbound_irq(smp_processor_id(), false)) < 0) { + struct evtchn_close close = { .port = local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); + goto out; + } + + evtchn_to_irq[local_port] = irq; + irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); + irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + +static int bind_listening_port_to_irq(unsigned int remote_domain) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + return err ? : bind_local_port_to_irq(alloc_unbound.port); +} + +static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + unsigned int remote_port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + + return err ? : bind_local_port_to_irq(bind_interdomain.local_port); +} + +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { + if ((irq = find_unbound_irq(cpu, false)) < 0) + goto out; + + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + evtchn_to_irq[evtchn] = irq; +#ifndef PER_CPU_VIRQ_IRQ + { + unsigned int cpu; + + for_each_possible_cpu(cpu) + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + } +#endif + irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + +#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) { + if ((irq = find_unbound_irq(cpu, false)) < 0) + goto out; + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; + irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_cfg(irq)->bindcount++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} +#endif + +static void unbind_from_irq(unsigned int irq) +{ + struct evtchn_close close; + unsigned int cpu; + int evtchn = evtchn_from_irq(irq); + + BUG_IF_VIRQ_PER_CPU(irq); + BUG_IF_IPI(irq); + + spin_lock(&irq_mapping_update_lock); + + if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) { + close.port = evtchn; + if ((type_from_irq(irq) != IRQT_CALLER_PORT) && + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; +#ifndef PER_CPU_VIRQ_IRQ + for_each_possible_cpu(cpu) + per_cpu(virq_to_evtchn, cpu) + [index_from_irq(irq)] = 0; +#endif + break; +#if defined(CONFIG_SMP) && defined(PER_CPU_IPI_IRQ) + case IRQT_IPI: + per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; +#endif + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_cfg(irq)->info = IRQ_UNBOUND; + + /* Zap stats across IRQ changes of use. */ + for_each_possible_cpu(cpu) +#ifdef CONFIG_GENERIC_HARDIRQS + irq_to_desc(irq)->kstat_irqs[cpu] = 0; +#else + kstat_cpu(cpu).irqs[irq] = 0; +#endif + } + + spin_unlock(&irq_mapping_update_lock); +} + +#if defined(CONFIG_SMP) && (!defined(PER_CPU_IPI_IRQ) || !defined(PER_CPU_VIRQ_IRQ)) +void unbind_from_per_cpu_irq(unsigned int irq, unsigned int cpu, + struct irqaction *action) +{ + struct evtchn_close close; + int evtchn = evtchn_from_per_cpu_irq(irq, cpu); + struct irqaction *free_action = NULL; + + spin_lock(&irq_mapping_update_lock); + + if (VALID_EVTCHN(evtchn)) { + struct irq_desc *desc = irq_to_desc(irq); + + mask_evtchn(evtchn); + + BUG_ON(irq_cfg(irq)->bindcount <= 1); + irq_cfg(irq)->bindcount--; + +#ifndef PER_CPU_VIRQ_IRQ + if (type_from_irq(irq) == IRQT_VIRQ) { + unsigned int virq = index_from_irq(irq); + struct per_cpu_irqaction *cur, *prev = NULL; + + cur = virq_actions[virq]; + while (cur) { + if (cur->action.dev_id == action) { + cpu_clear(cpu, cur->cpus); + if (cpus_empty(cur->cpus)) { + if (prev) + prev->next = cur->next; + else + virq_actions[virq] = cur->next; + free_action = action; + } + } else if (cpu_isset(cpu, cur->cpus)) + evtchn = 0; + cur = (prev = cur)->next; + } + if (!VALID_EVTCHN(evtchn)) + goto done; + } +#endif + + cpumask_clear_cpu(cpu, desc->affinity); + + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + BUG(); + + switch (type_from_irq(irq)) { +#ifndef PER_CPU_VIRQ_IRQ + case IRQT_VIRQ: + per_cpu(virq_to_evtchn, cpu)[index_from_irq(irq)] = 0; + break; +#endif +#ifndef PER_CPU_IPI_IRQ + case IRQT_IPI: + per_cpu(ipi_to_evtchn, cpu)[index_from_irq(irq)] = 0; + break; +#endif + default: + BUG(); + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + } + +#ifndef PER_CPU_VIRQ_IRQ +done: +#endif + spin_unlock(&irq_mapping_update_lock); + + if (free_action) + free_irq(irq, free_action); +} +EXPORT_SYMBOL_GPL(unbind_from_per_cpu_irq); +#endif /* CONFIG_SMP && (!PER_CPU_IPI_IRQ || !PER_CPU_VIRQ_IRQ) */ + +int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_caller_port_to_irq(caller_port); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler); + +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_listening_port_to_irq(remote_domain); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler); + +int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); + +int bind_virq_to_irqhandler( + unsigned int virq, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + BUG_IF_VIRQ_PER_CPU(virq); + + irq = bind_virq_to_irq(virq, cpu); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); + +#ifdef CONFIG_SMP +#ifndef PER_CPU_VIRQ_IRQ +int bind_virq_to_irqaction( + unsigned int virq, + unsigned int cpu, + struct irqaction *action) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq, retval = 0; + struct per_cpu_irqaction *cur = NULL, *new; + + BUG_ON(!test_bit(virq, virq_per_cpu)); + + if (action->dev_id) + return -EINVAL; + + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (new) { + new->action = *action; + new->action.dev_id = action; + } + + spin_lock(&irq_mapping_update_lock); + + for (cur = virq_actions[virq]; cur; cur = cur->next) + if (cur->action.dev_id == action) + break; + if (!cur) { + if (!new) { + spin_unlock(&irq_mapping_update_lock); + return -ENOMEM; + } + new->next = virq_actions[virq]; + virq_actions[virq] = cur = new; + retval = 1; + } + cpu_set(cpu, cur->cpus); + action = &cur->action; + + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { + unsigned int nr; + + BUG_ON(!retval); + + if ((irq = find_unbound_irq(cpu, true)) < 0) { + if (cur) + virq_actions[virq] = cur->next; + spin_unlock(&irq_mapping_update_lock); + if (cur != new) + kfree(new); + return irq; + } + + /* Extra reference so count will never drop to zero. */ + irq_cfg(irq)->bindcount++; + + for_each_possible_cpu(nr) + per_cpu(virq_to_irq, nr)[virq] = irq; + irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, 0); + } + + evtchn = per_cpu(virq_to_evtchn, cpu)[virq]; + if (!VALID_EVTCHN(evtchn)) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + evtchn_to_irq[evtchn] = irq; + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_cfg(irq)->bindcount++; + + spin_unlock(&irq_mapping_update_lock); + + if (cur != new) + kfree(new); + + if (retval == 0) { + unsigned long flags; + + local_irq_save(flags); + unmask_evtchn(evtchn); + local_irq_restore(flags); + } else { + action->flags |= IRQF_PERCPU; + retval = setup_irq(irq, action); + if (retval) { + unbind_from_per_cpu_irq(irq, cpu, cur->action.dev_id); + BUG_ON(retval > 0); + irq = retval; + } + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqaction); +#endif + +#ifdef PER_CPU_IPI_IRQ +int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_ipi_to_irq(ipi, cpu); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags | IRQF_NO_SUSPEND, + devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +#else +int __cpuinit bind_ipi_to_irqaction( + unsigned int ipi, + unsigned int cpu, + struct irqaction *action) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq, retval = 0; + + spin_lock(&irq_mapping_update_lock); + + if (VALID_EVTCHN(per_cpu(ipi_to_evtchn, cpu)[ipi])) { + spin_unlock(&irq_mapping_update_lock); + return -EBUSY; + } + + if ((irq = ipi_to_irq[ipi]) == -1) { + if ((irq = find_unbound_irq(cpu, true)) < 0) { + spin_unlock(&irq_mapping_update_lock); + return irq; + } + + /* Extra reference so count will never drop to zero. */ + irq_cfg(irq)->bindcount++; + + ipi_to_irq[ipi] = irq; + irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, 0); + retval = 1; + } + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + + evtchn = bind_ipi.port; + evtchn_to_irq[evtchn] = irq; + per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; + + bind_evtchn_to_cpu(evtchn, cpu); + + irq_cfg(irq)->bindcount++; + + spin_unlock(&irq_mapping_update_lock); + + if (retval == 0) { + unsigned long flags; + + local_irq_save(flags); + unmask_evtchn(evtchn); + local_irq_restore(flags); + } else { + action->flags |= IRQF_PERCPU | IRQF_NO_SUSPEND; + retval = setup_irq(irq, action); + if (retval) { + unbind_from_per_cpu_irq(irq, cpu, NULL); + BUG_ON(retval > 0); + irq = retval; + } + } + + return irq; +} +#endif /* PER_CPU_IPI_IRQ */ +#endif /* CONFIG_SMP */ + +void unbind_from_irqhandler(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(unbind_from_irqhandler); + +#ifdef CONFIG_SMP +void rebind_evtchn_to_cpu(int port, unsigned int cpu) +{ + struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu }; + int masked; + + masked = test_and_set_evtchn_mask(port); + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0) + bind_evtchn_to_cpu(port, cpu); + if (!masked) + unmask_evtchn(port); +} + +static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu) +{ + int evtchn = evtchn_from_irq(irq); + + BUG_IF_VIRQ_PER_CPU(irq); + BUG_IF_IPI(irq); + + if (VALID_EVTCHN(evtchn)) + rebind_evtchn_to_cpu(evtchn, tcpu); +} + +static int set_affinity_irq(unsigned int irq, const struct cpumask *dest) +{ + rebind_irq_to_cpu(irq, cpumask_first(dest)); + + return 0; +} +#endif + +int resend_irq_on_evtchn(unsigned int irq) +{ + int masked, evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + return 1; + + masked = test_and_set_evtchn_mask(evtchn); + set_evtchn(evtchn); + if (!masked) + unmask_evtchn(evtchn); + + return 1; +} + +/* + * Interface to generic handling in irq.c + */ + +static unsigned int startup_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + unmask_evtchn(evtchn); + return 0; +} + +static void unmask_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + unmask_evtchn(evtchn); +} + +static void mask_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + mask_evtchn(evtchn); +} + +static void ack_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + mask_evtchn(evtchn); + clear_evtchn(evtchn); + } +} + +static void end_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn) && !(irq_to_desc(irq)->status & IRQ_DISABLED)) + unmask_evtchn(evtchn); +} + +static struct irq_chip dynirq_chip = { + .name = "Dynamic", + .startup = startup_dynirq, + .shutdown = mask_dynirq, + .disable = mask_dynirq, + .mask = mask_dynirq, + .unmask = unmask_dynirq, + .mask_ack = ack_dynirq, + .ack = ack_dynirq, + .eoi = end_dynirq, + .end = end_dynirq, +#ifdef CONFIG_SMP + .set_affinity = set_affinity_irq, +#endif + .retrigger = resend_irq_on_evtchn, +}; + +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ +static bool pirq_eoi_does_unmask; +static unsigned long *pirq_needs_eoi; +static DECLARE_BITMAP(probing_pirq, NR_PIRQS); + +static void pirq_unmask_and_notify(unsigned int evtchn, unsigned int irq) +{ + struct physdev_eoi eoi = { .irq = evtchn_get_xen_pirq(irq) }; + + if (pirq_eoi_does_unmask) { + if (test_bit(eoi.irq, pirq_needs_eoi)) + VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi)); + else + unmask_evtchn(evtchn); + } else if (test_bit(irq - PIRQ_BASE, pirq_needs_eoi)) { + if (smp_processor_id() != cpu_from_evtchn(evtchn)) { + struct evtchn_unmask unmask = { .port = evtchn }; + struct multicall_entry mcl[2]; + + mcl[0].op = __HYPERVISOR_event_channel_op; + mcl[0].args[0] = EVTCHNOP_unmask; + mcl[0].args[1] = (unsigned long)&unmask; + mcl[1].op = __HYPERVISOR_physdev_op; + mcl[1].args[0] = PHYSDEVOP_eoi; + mcl[1].args[1] = (unsigned long)&eoi; + + if (HYPERVISOR_multicall(mcl, 2)) + BUG(); + } else { + unmask_evtchn(evtchn); + VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi)); + } + } else + unmask_evtchn(evtchn); +} + +static inline void pirq_query_unmask(int irq) +{ + struct physdev_irq_status_query irq_status; + + if (pirq_eoi_does_unmask) + return; + irq_status.irq = evtchn_get_xen_pirq(irq); + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + irq_status.flags = 0; + clear_bit(irq - PIRQ_BASE, pirq_needs_eoi); + if (irq_status.flags & XENIRQSTAT_needs_eoi) + set_bit(irq - PIRQ_BASE, pirq_needs_eoi); +} + +static int set_type_pirq(unsigned int irq, unsigned int type) +{ + if (type != IRQ_TYPE_PROBE) + return -EINVAL; + set_bit(irq - PIRQ_BASE, probing_pirq); + return 0; +} + +static unsigned int startup_pirq(unsigned int irq) +{ + struct evtchn_bind_pirq bind_pirq; + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + clear_bit(irq - PIRQ_BASE, probing_pirq); + goto out; + } + + bind_pirq.pirq = evtchn_get_xen_pirq(irq); + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq) + || (irq_to_desc(irq)->status & IRQ_AUTODETECT) + ? 0 : BIND_PIRQ__WILL_SHARE; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + if (bind_pirq.flags) + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", + irq); + return 0; + } + evtchn = bind_pirq.port; + + pirq_query_unmask(irq); + + evtchn_to_irq[evtchn] = irq; + bind_evtchn_to_cpu(evtchn, 0); + irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn); + + out: + pirq_unmask_and_notify(evtchn, irq); + + return 0; +} + +static void shutdown_pirq(unsigned int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + return; + + mask_evtchn(evtchn); + + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; + irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0); +} + +static void unmask_pirq(unsigned int irq) +{ + startup_pirq(irq); +} + +static void mask_pirq(unsigned int irq) +{ +} + +static void ack_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + mask_evtchn(evtchn); + clear_evtchn(evtchn); + } +} + +static void end_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if ((irq_to_desc(irq)->status & (IRQ_DISABLED|IRQ_PENDING)) == + (IRQ_DISABLED|IRQ_PENDING)) { + shutdown_pirq(irq); + } else if (VALID_EVTCHN(evtchn)) + pirq_unmask_and_notify(evtchn, irq); +} + +static struct irq_chip pirq_chip = { + .name = "Phys", + .startup = startup_pirq, + .shutdown = shutdown_pirq, + .mask = mask_pirq, + .unmask = unmask_pirq, + .mask_ack = ack_pirq, + .ack = ack_pirq, + .end = end_pirq, + .set_type = set_type_pirq, +#ifdef CONFIG_SMP + .set_affinity = set_affinity_irq, +#endif + .retrigger = resend_irq_on_evtchn, +}; + +int irq_ignore_unhandled(unsigned int irq) +{ + struct physdev_irq_status_query irq_status = { .irq = irq }; + + if (!is_running_on_xen()) + return 0; + + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + return 0; + return !!(irq_status.flags & XENIRQSTAT_shared); +} + +#if defined(CONFIG_SMP) && !defined(PER_CPU_IPI_IRQ) +void notify_remote_via_ipi(unsigned int ipi, unsigned int cpu) +{ + int evtchn = evtchn_from_per_cpu_irq(ipi_to_irq[ipi], cpu); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} +#endif + +void notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + BUG_ON(type_from_irq(irq) == IRQT_VIRQ); + BUG_IF_IPI(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} +EXPORT_SYMBOL_GPL(notify_remote_via_irq); + +int multi_notify_remote_via_irq(multicall_entry_t *mcl, int irq) +{ + int evtchn = evtchn_from_irq(irq); + + BUG_ON(type_from_irq(irq) == IRQT_VIRQ); + BUG_IF_IPI(irq); + + if (!VALID_EVTCHN(evtchn)) + return -EINVAL; + + multi_notify_remote_via_evtchn(mcl, evtchn); + return 0; +} +EXPORT_SYMBOL_GPL(multi_notify_remote_via_irq); + +int irq_to_evtchn_port(int irq) +{ + BUG_IF_VIRQ_PER_CPU(irq); + BUG_IF_IPI(irq); + return evtchn_from_irq(irq); +} +EXPORT_SYMBOL_GPL(irq_to_evtchn_port); + +void mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, s->evtchn_mask); +} +EXPORT_SYMBOL_GPL(mask_evtchn); + +void unmask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned int cpu = smp_processor_id(); + + BUG_ON(!irqs_disabled()); + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask)); + return; + } + + synch_clear_bit(port, s->evtchn_mask); + + /* Did we miss an interrupt 'edge'? Re-fire if so. */ + if (synch_test_bit(port, s->evtchn_pending)) { + vcpu_info_t *vcpu_info = current_vcpu_info(); + + if (!synch_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } +} +EXPORT_SYMBOL_GPL(unmask_evtchn); + +void disable_all_local_evtchn(void) +{ + unsigned i, cpu = smp_processor_id(); + shared_info_t *s = HYPERVISOR_shared_info; + + for (i = 0; i < NR_EVENT_CHANNELS; ++i) + if (cpu_from_evtchn(i) == cpu) + synch_set_bit(i, &s->evtchn_mask[0]); +} + +/* Clear an irq's pending state, in preparation for polling on it. */ +void xen_clear_irq_pending(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +/* Set an irq's pending state, to avoid blocking on it. */ +void xen_set_irq_pending(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + set_evtchn(evtchn); +} + +/* Test an irq's pending state. */ +int xen_test_irq_pending(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + return VALID_EVTCHN(evtchn) && test_evtchn(evtchn); +} + +/* Poll waiting for an irq to become pending. In the usual case, the + irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq) +{ + evtchn_port_t evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn) + && HYPERVISOR_poll_no_timeout(&evtchn, 1)) + BUG(); +} + +#ifdef CONFIG_PM_SLEEP +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + +#ifndef PER_CPU_VIRQ_IRQ + if (test_bit(virq, virq_per_cpu) + && !VALID_EVTCHN(per_cpu(virq_to_evtchn, cpu)[virq])) + continue; +#endif + + BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_VIRQ, virq, 0)); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; +#ifdef PER_CPU_VIRQ_IRQ + irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn); +#else + if (test_bit(virq, virq_per_cpu)) + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + else { + unsigned int cpu; + + irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, + evtchn); + for_each_possible_cpu(cpu) + per_cpu(virq_to_evtchn, cpu)[virq] = evtchn; + } +#endif + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ +#ifdef CONFIG_SMP + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < NR_IPIS; ipi++) { +#ifdef PER_CPU_IPI_IRQ + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) +#else + if ((irq = ipi_to_irq[ipi]) == -1 + || !VALID_EVTCHN(per_cpu(ipi_to_evtchn, cpu)[ipi])) +#endif + continue; + + BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0)); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; +#ifdef PER_CPU_IPI_IRQ + irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn); +#else + per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; +#endif + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) + unmask_evtchn(evtchn); + } +#endif +} + +static int evtchn_resume(struct sys_device *dev) +{ + unsigned int cpu, irq, evtchn; + struct irq_cfg *cfg; + struct evtchn_status status; + + /* Avoid doing anything in the 'suspend cancelled' case. */ + status.dom = DOMID_SELF; +#ifdef PER_CPU_VIRQ_IRQ + status.port = evtchn_from_irq(percpu_read(virq_to_irq[VIRQ_TIMER])); +#else + status.port = percpu_read(virq_to_evtchn[VIRQ_TIMER]); +#endif + if (HYPERVISOR_event_channel_op(EVTCHNOP_status, &status)) + BUG(); + if (status.status == EVTCHNSTAT_virq + && status.vcpu == smp_processor_id() + && status.u.virq == VIRQ_TIMER) + return 0; + + init_evtchn_cpu_bindings(); + + if (pirq_eoi_does_unmask) { + struct physdev_pirq_eoi_gmfn eoi_gmfn; + + eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT; + if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn)) + BUG(); + } + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* Check that no PIRQs are still bound. */ + for (irq = PIRQ_BASE; irq < (PIRQ_BASE + nr_pirqs); irq++) { + cfg = irq_cfg(irq); + BUG_ON(cfg && cfg->info != IRQ_UNBOUND); + } + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < nr_irqs; irq++) { + cfg = irq_cfg(irq); + if (cfg) + cfg->info &= ~((1U << _EVTCHN_BITS) - 1); + } + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } + + return 0; +} + +static struct sysdev_class evtchn_sysclass = { + .name = "evtchn", + .resume = evtchn_resume, +}; + +static struct sys_device device_evtchn = { + .id = 0, + .cls = &evtchn_sysclass, +}; + +static int __init evtchn_register(void) +{ + int err; + + if (is_initial_xendomain()) + return 0; + + err = sysdev_class_register(&evtchn_sysclass); + if (!err) + err = sysdev_register(&device_evtchn); + return err; +} +core_initcall(evtchn_register); +#endif + +int __init arch_early_irq_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++) + irq_to_desc(i)->chip_data = _irq_cfg + i; + + return 0; +} + +#ifdef CONFIG_SPARSE_IRQ +int arch_init_chip_data(struct irq_desc *desc, int cpu) +{ + if (!desc->chip_data) { + /* By default all event channels notify CPU#0. */ + cpumask_copy(desc->affinity, cpumask_of(0)); + + desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC); + } + if (!desc->chip_data) { + printk(KERN_ERR "cannot alloc irq_cfg\n"); + BUG(); + } + + return 0; +} +#endif + +#if defined(CONFIG_X86_IO_APIC) +#ifdef CONFIG_SPARSE_IRQ +int nr_pirqs = NR_PIRQS; +EXPORT_SYMBOL_GPL(nr_pirqs); + +int __init arch_probe_nr_irqs(void) +{ + int nr_irqs_gsi, nr = acpi_probe_gsi(); + + if (nr <= NR_IRQS_LEGACY) { + /* for acpi=off or acpi not compiled in */ + int idx; + + for (nr = idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + } + nr_irqs_gsi = max(nr, NR_IRQS_LEGACY); + + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#ifdef CONFIG_PCI_MSI + nr += nr_irqs_gsi * 16; +#endif + if (nr_pirqs > nr) { + nr_pirqs = nr; + nr_irqs = nr + NR_DYNIRQS; + } + + printk(KERN_DEBUG "nr_irqs_gsi=%d nr_pirqs=%d\n", + nr_irqs_gsi, nr_pirqs); + + return 0; +} +#endif + +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +{ + struct physdev_irq irq_op; + + if (irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs) + return -EINVAL; + + if (cfg->vector) + return 0; + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; + + cfg->vector = irq_op.vector; + + return 0; +} +#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE)) +#elif defined(CONFIG_X86) +#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < NR_IRQS_LEGACY) +#else +#define identity_mapped_irq(irq) (1) +#endif + +void evtchn_register_pirq(int irq) +{ + BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs); + if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND) + return; + irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0); + set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq, + "level"); +} + +int evtchn_map_pirq(int irq, int xen_pirq) +{ + if (irq < 0) { + static DEFINE_SPINLOCK(irq_alloc_lock); + + irq = PIRQ_BASE + nr_pirqs - 1; + spin_lock(&irq_alloc_lock); + do { + struct irq_desc *desc; + struct irq_cfg *cfg; + + if (identity_mapped_irq(irq)) + continue; + desc = irq_to_desc_alloc_node(irq, numa_node_id()); + cfg = desc->chip_data; + if (!index_from_irq(irq)) { + BUG_ON(type_from_irq(irq) != IRQT_UNBOUND); + cfg->info = mk_irq_info(IRQT_PIRQ, + xen_pirq, 0); + break; + } + } while (--irq >= PIRQ_BASE); + spin_unlock(&irq_alloc_lock); + if (irq < PIRQ_BASE) + return -ENOSPC; + set_irq_chip_and_handler_name(irq, &pirq_chip, + handle_level_irq, "level"); + } else if (!xen_pirq) { + if (unlikely(type_from_irq(irq) != IRQT_PIRQ)) + return -EINVAL; + /* + * dynamic_irq_cleanup(irq) would seem to be the correct thing + * here, but cannot be used as we get here also during shutdown + * when a driver didn't free_irq() its MSI(-X) IRQ(s), which + * then causes a warning in dynamic_irq_cleanup(). + */ + set_irq_chip_and_handler(irq, NULL, NULL); + irq_cfg(irq)->info = IRQ_UNBOUND; + return 0; + } else if (type_from_irq(irq) != IRQT_PIRQ + || index_from_irq(irq) != xen_pirq) { + printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - " + "cannot map to PIRQ#%u\n", + irq, type_from_irq(irq), index_from_irq(irq), xen_pirq); + return -EINVAL; + } + return index_from_irq(irq) ? irq : -EINVAL; +} + +int evtchn_get_xen_pirq(int irq) +{ + if (identity_mapped_irq(irq)) + return irq; + BUG_ON(type_from_irq(irq) != IRQT_PIRQ); + return index_from_irq(irq); +} + +void __init xen_init_IRQ(void) +{ + unsigned int i; + struct physdev_pirq_eoi_gmfn eoi_gmfn; + +#ifndef PER_CPU_VIRQ_IRQ + __set_bit(VIRQ_TIMER, virq_per_cpu); + __set_bit(VIRQ_DEBUG, virq_per_cpu); + __set_bit(VIRQ_XENOPROF, virq_per_cpu); +#ifdef CONFIG_IA64 + __set_bit(VIRQ_ITC, virq_per_cpu); +#endif +#endif + + init_evtchn_cpu_bindings(); + + i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(nr_pirqs)); + pirq_needs_eoi = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i); + BUILD_BUG_ON(NR_PIRQS > PAGE_SIZE * 8); + eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT; + if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) + pirq_eoi_does_unmask = true; + + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); + +#ifndef CONFIG_SPARSE_IRQ + for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) { + irq_to_desc(i)->status |= IRQ_NOPROBE; + set_irq_chip_and_handler_name(i, &dynirq_chip, + handle_level_irq, "level"); + } + + for (i = PIRQ_BASE; i < (PIRQ_BASE + nr_pirqs); i++) { +#else + for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_IRQS_LEGACY); i++) { +#endif + if (!identity_mapped_irq(i)) + continue; + +#ifdef RTC_IRQ + /* If not domain 0, force our RTC driver to fail its probe. */ + if (i - PIRQ_BASE == RTC_IRQ && !is_initial_xendomain()) + continue; +#endif + + set_irq_chip_and_handler_name(i, &pirq_chip, + handle_level_irq, "level"); + } +} --- linux-ec2-2.6.31.orig/drivers/xen/core/spinlock.c +++ linux-ec2-2.6.31/drivers/xen/core/spinlock.c @@ -0,0 +1,162 @@ +/* + * Xen spinlock functions + * + * See arch/x86/xen/smp.c for copyright and credits for derived + * portions of this file. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef TICKET_SHIFT + +static int __read_mostly spinlock_irq = -1; + +struct spinning { + raw_spinlock_t *lock; + unsigned int ticket; + struct spinning *prev; +}; +static DEFINE_PER_CPU(struct spinning *, spinning); +/* + * Protect removal of objects: Addition can be done lockless, and even + * removal itself doesn't need protection - what needs to be prevented is + * removed objects going out of scope (as they're allocated on the stack. + */ +static DEFINE_PER_CPU(raw_rwlock_t, spinning_rm_lock) = __RAW_RW_LOCK_UNLOCKED; + +int __cpuinit xen_spinlock_init(unsigned int cpu) +{ + static struct irqaction spinlock_action = { + .handler = smp_reschedule_interrupt, + .flags = IRQF_DISABLED, + .name = "spinlock" + }; + int rc; + + rc = bind_ipi_to_irqaction(SPIN_UNLOCK_VECTOR, + cpu, + &spinlock_action); + if (rc < 0) + return rc; + + if (spinlock_irq < 0) { + disable_irq(rc); /* make sure it's never delivered */ + spinlock_irq = rc; + } else + BUG_ON(spinlock_irq != rc); + + return 0; +} + +void __cpuinit xen_spinlock_cleanup(unsigned int cpu) +{ + unbind_from_per_cpu_irq(spinlock_irq, cpu, NULL); +} + +int xen_spin_wait(raw_spinlock_t *lock, unsigned int token) +{ + int rc = 0, irq = spinlock_irq; + raw_rwlock_t *rm_lock; + unsigned long flags; + struct spinning spinning; + + /* If kicker interrupt not initialized yet, just spin. */ + if (unlikely(irq < 0) || unlikely(!cpu_online(raw_smp_processor_id()))) + return 0; + + token >>= TICKET_SHIFT; + + /* announce we're spinning */ + spinning.ticket = token; + spinning.lock = lock; + spinning.prev = percpu_read(spinning); + smp_wmb(); + percpu_write(spinning, &spinning); + + /* clear pending */ + xen_clear_irq_pending(irq); + + do { + /* Check again to make sure it didn't become free while + * we weren't looking. */ + if ((lock->slock & ((1U << TICKET_SHIFT) - 1)) == token) { + /* If we interrupted another spinlock while it was + * blocking, make sure it doesn't block (again) + * without rechecking the lock. */ + if (spinning.prev) + xen_set_irq_pending(irq); + rc = 1; + break; + } + + /* block until irq becomes pending */ + xen_poll_irq(irq); + } while (!xen_test_irq_pending(irq)); + + /* Leave the irq pending so that any interrupted blocker will + * re-check. */ + if (!rc) + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + + /* announce we're done */ + percpu_write(spinning, spinning.prev); + rm_lock = &__get_cpu_var(spinning_rm_lock); + raw_local_irq_save(flags); + __raw_write_lock(rm_lock); + __raw_write_unlock(rm_lock); + raw_local_irq_restore(flags); + + return rc; +} + +unsigned int xen_spin_adjust(raw_spinlock_t *lock, unsigned int token) +{ + return token;//todo +} + +int xen_spin_wait_flags(raw_spinlock_t *lock, unsigned int *token, + unsigned int flags) +{ + return xen_spin_wait(lock, *token);//todo +} + +void xen_spin_kick(raw_spinlock_t *lock, unsigned int token) +{ + unsigned int cpu; + + token &= (1U << TICKET_SHIFT) - 1; + for_each_online_cpu(cpu) { + raw_rwlock_t *rm_lock; + unsigned long flags; + struct spinning *spinning; + + if (cpu == raw_smp_processor_id()) + continue; + + rm_lock = &per_cpu(spinning_rm_lock, cpu); + raw_local_irq_save(flags); + __raw_read_lock(rm_lock); + + spinning = per_cpu(spinning, cpu); + smp_rmb(); + if (spinning + && (spinning->lock != lock || spinning->ticket != token)) + spinning = NULL; + + __raw_read_unlock(rm_lock); + raw_local_irq_restore(flags); + + if (unlikely(spinning)) { + notify_remote_via_ipi(SPIN_UNLOCK_VECTOR, cpu); + return; + } + } +} +EXPORT_SYMBOL(xen_spin_kick); + +#endif /* TICKET_SHIFT */ --- linux-ec2-2.6.31.orig/drivers/xen/core/xen_sysfs.c +++ linux-ec2-2.6.31/drivers/xen/core/xen_sysfs.c @@ -0,0 +1,421 @@ +/* + * copyright (c) 2006 IBM Corporation + * Authored by: Mike D. Day + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../xenbus/xenbus_comms.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mike D. Day "); + +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return sprintf(buffer, "xen\n"); +} + +HYPERVISOR_ATTR_RO(type); + +static int __init xen_sysfs_type_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &type_attr.attr); +} + +static void xen_sysfs_type_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &type_attr.attr); +} + +/* xen version attributes */ +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version >> 16); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(major); + +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version & 0xff); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(minor); + +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *extra; + + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); + if (extra) { + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); + if (!ret) + ret = sprintf(buffer, "%s\n", extra); + kfree(extra); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(extra); + +static struct attribute *version_attrs[] = { + &major_attr.attr, + &minor_attr.attr, + &extra_attr.attr, + NULL +}; + +static struct attribute_group version_group = { + .name = "version", + .attrs = version_attrs, +}; + +static int __init xen_sysfs_version_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &version_group); +} + +static void xen_sysfs_version_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &version_group); +} + +/* UUID */ + +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + char *vm, *val; + int ret; + + if (!is_xenstored_ready()) + return -EBUSY; + + vm = xenbus_read(XBT_NIL, "vm", "", NULL); + if (IS_ERR(vm)) + return PTR_ERR(vm); + val = xenbus_read(XBT_NIL, vm, "uuid", NULL); + kfree(vm); + if (IS_ERR(val)) + return PTR_ERR(val); + ret = sprintf(buffer, "%s\n", val); + kfree(val); + return ret; +} + +HYPERVISOR_ATTR_RO(uuid); + +static int __init xen_sysfs_uuid_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); +} + +static void xen_sysfs_uuid_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); +} + +/* xen compilation attributes */ + +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compiler); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiler); + +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_by); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiled_by); + +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_date); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compile_date); + +static struct attribute *xen_compile_attrs[] = { + &compiler_attr.attr, + &compiled_by_attr.attr, + &compile_date_attr.attr, + NULL +}; + +static struct attribute_group xen_compilation_group = { + .name = "compilation", + .attrs = xen_compile_attrs, +}; + +int __init static xen_compilation_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); +} + +static void xen_compilation_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); +} + +/* xen properties info */ + +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *caps; + + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); + if (caps) { + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); + if (!ret) + ret = sprintf(buffer, "%s\n", caps); + kfree(caps); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(capabilities); + +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *cset; + + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); + if (cset) { + ret = HYPERVISOR_xen_version(XENVER_changeset, cset); + if (!ret) + ret = sprintf(buffer, "%s\n", cset); + kfree(cset); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(changeset); + +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_platform_parameters *parms; + + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); + if (parms) { + ret = HYPERVISOR_xen_version(XENVER_platform_parameters, + parms); + if (!ret) + ret = sprintf(buffer, "%lx\n", parms->virt_start); + kfree(parms); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(virtual_start); + +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); + if (ret > 0) + ret = sprintf(buffer, "%x\n", ret); + + return ret; +} + +HYPERVISOR_ATTR_RO(pagesize); + +/* eventually there will be several more features to export */ +static ssize_t xen_feature_show(int index, char *buffer) +{ + int ret = -ENOMEM; + struct xen_feature_info *info; + + info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL); + if (info) { + info->submap_idx = index; + ret = HYPERVISOR_xen_version(XENVER_get_features, info); + if (!ret) + ret = sprintf(buffer, "%d\n", info->submap); + kfree(info); + } + + return ret; +} + +static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return xen_feature_show(XENFEAT_writable_page_tables, buffer); +} + +HYPERVISOR_ATTR_RO(writable_pt); + +static struct attribute *xen_properties_attrs[] = { + &capabilities_attr.attr, + &changeset_attr.attr, + &virtual_start_attr.attr, + &pagesize_attr.attr, + &writable_pt_attr.attr, + NULL +}; + +static struct attribute_group xen_properties_group = { + .name = "properties", + .attrs = xen_properties_attrs, +}; + +static int __init xen_properties_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_properties_group); +} + +static void xen_properties_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); +} + +#ifdef CONFIG_KEXEC + +extern size_t vmcoreinfo_size_xen; +extern unsigned long paddr_vmcoreinfo_xen; + +static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page) +{ + return sprintf(page, "%lx %zx\n", + paddr_vmcoreinfo_xen, vmcoreinfo_size_xen); +} + +HYPERVISOR_ATTR_RO(vmcoreinfo); + +static int __init xen_sysfs_vmcoreinfo_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &vmcoreinfo_attr.attr); +} + +static void xen_sysfs_vmcoreinfo_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr); +} + +#endif + +static int __init hyper_sysfs_init(void) +{ + int ret; + + if (!is_running_on_xen()) + return -ENODEV; + + ret = xen_sysfs_type_init(); + if (ret) + goto out; + ret = xen_sysfs_version_init(); + if (ret) + goto version_out; + ret = xen_compilation_init(); + if (ret) + goto comp_out; + ret = xen_sysfs_uuid_init(); + if (ret) + goto uuid_out; + ret = xen_properties_init(); + if (ret) + goto prop_out; +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) { + ret = xen_sysfs_vmcoreinfo_init(); + if (ret) + goto vmcoreinfo_out; + } +#endif + + goto out; + +#ifdef CONFIG_KEXEC +vmcoreinfo_out: +#endif + xen_properties_destroy(); +prop_out: + xen_sysfs_uuid_destroy(); +uuid_out: + xen_compilation_destroy(); +comp_out: + xen_sysfs_version_destroy(); +version_out: + xen_sysfs_type_destroy(); +out: + return ret; +} + +static void __exit hyper_sysfs_exit(void) +{ +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) + xen_sysfs_vmcoreinfo_destroy(); +#endif + xen_properties_destroy(); + xen_compilation_destroy(); + xen_sysfs_uuid_destroy(); + xen_sysfs_version_destroy(); + xen_sysfs_type_destroy(); + +} + +module_init(hyper_sysfs_init); +module_exit(hyper_sysfs_exit); --- linux-ec2-2.6.31.orig/drivers/xen/core/xen_proc.c +++ linux-ec2-2.6.31/drivers/xen/core/xen_proc.c @@ -0,0 +1,23 @@ + +#include +#include +#include + +static struct proc_dir_entry *xen_base; + +struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode) +{ + if ( xen_base == NULL ) + if ( (xen_base = proc_mkdir("xen", NULL)) == NULL ) + panic("Couldn't create /proc/xen"); + return create_proc_entry(name, mode, xen_base); +} + +EXPORT_SYMBOL_GPL(create_xen_proc_entry); + +void remove_xen_proc_entry(const char *name) +{ + remove_proc_entry(name, xen_base); +} + +EXPORT_SYMBOL_GPL(remove_xen_proc_entry); --- linux-ec2-2.6.31.orig/drivers/xen/core/reboot.c +++ linux-ec2-2.6.31/drivers/xen/core/reboot.c @@ -0,0 +1,349 @@ +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#undef handle_sysrq +#endif + +MODULE_LICENSE("Dual BSD/GPL"); + +#define SHUTDOWN_INVALID -1 +#define SHUTDOWN_POWEROFF 0 +#define SHUTDOWN_SUSPEND 2 +#define SHUTDOWN_RESUMING 3 +#define SHUTDOWN_HALT 4 + +/* Ignore multiple shutdown requests. */ +static int shutting_down = SHUTDOWN_INVALID; + +/* Can we leave APs online when we suspend? */ +static int fast_suspend; + +static void __shutdown_handler(struct work_struct *unused); +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); + +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)); + +static int shutdown_process(void *__unused) +{ + static char *envp[] = { "HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + static char *poweroff_argv[] = { "/sbin/poweroff", NULL }; + + extern asmlinkage long sys_reboot(int magic1, int magic2, + unsigned int cmd, void *arg); + + if ((shutting_down == SHUTDOWN_POWEROFF) || + (shutting_down == SHUTDOWN_HALT)) { + if (call_usermodehelper("/sbin/poweroff", poweroff_argv, + envp, 0) < 0) { +#ifdef CONFIG_XEN + sys_reboot(LINUX_REBOOT_MAGIC1, + LINUX_REBOOT_MAGIC2, + LINUX_REBOOT_CMD_POWER_OFF, + NULL); +#endif /* CONFIG_XEN */ + } + } + + shutting_down = SHUTDOWN_INVALID; /* could try again */ + + return 0; +} + +#ifdef CONFIG_PM_SLEEP + +static int setup_suspend_evtchn(void); + +/* Was last suspend request cancelled? */ +static int suspend_cancelled; + +static void xen_resume_notifier(int _suspend_cancelled) +{ + int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING); + BUG_ON(old_state != SHUTDOWN_SUSPEND); + suspend_cancelled = _suspend_cancelled; +} + +static int xen_suspend(void *__unused) +{ + int err, old_state; + + daemonize("suspend"); + err = set_cpus_allowed(current, cpumask_of_cpu(0)); + if (err) { + printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err); + goto fail; + } + + do { + err = __xen_suspend(fast_suspend, xen_resume_notifier); + if (err) { + printk(KERN_ERR "Xen suspend failed (%d)\n", err); + goto fail; + } + if (!suspend_cancelled) + setup_suspend_evtchn(); + old_state = cmpxchg( + &shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID); + } while (old_state == SHUTDOWN_SUSPEND); + + switch (old_state) { + case SHUTDOWN_INVALID: + case SHUTDOWN_SUSPEND: + BUG(); + case SHUTDOWN_RESUMING: + break; + default: + schedule_delayed_work(&shutdown_work, 0); + break; + } + + return 0; + + fail: + old_state = xchg(&shutting_down, SHUTDOWN_INVALID); + BUG_ON(old_state != SHUTDOWN_SUSPEND); + return 0; +} + +#else +# define xen_suspend NULL +#endif + +static void switch_shutdown_state(int new_state) +{ + int prev_state, old_state = SHUTDOWN_INVALID; + + /* We only drive shutdown_state into an active state. */ + if (new_state == SHUTDOWN_INVALID) + return; + + do { + /* We drop this transition if already in an active state. */ + if ((old_state != SHUTDOWN_INVALID) && + (old_state != SHUTDOWN_RESUMING)) + return; + /* Attempt to transition. */ + prev_state = old_state; + old_state = cmpxchg(&shutting_down, old_state, new_state); + } while (old_state != prev_state); + + /* Either we kick off the work, or we leave it to xen_suspend(). */ + if (old_state == SHUTDOWN_INVALID) + schedule_delayed_work(&shutdown_work, 0); + else + BUG_ON(old_state != SHUTDOWN_RESUMING); +} + +static void __shutdown_handler(struct work_struct *unused) +{ + int err; + + err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ? + xen_suspend : shutdown_process, + NULL, CLONE_FS | CLONE_FILES); + + if (err < 0) { + printk(KERN_WARNING "Error creating shutdown process (%d): " + "retrying...\n", -err); + schedule_delayed_work(&shutdown_work, HZ/2); + } +} + +static void shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + extern void ctrl_alt_del(void); + char *str; + struct xenbus_transaction xbt; + int err, new_state = SHUTDOWN_INVALID; + + if ((shutting_down != SHUTDOWN_INVALID) && + (shutting_down != SHUTDOWN_RESUMING)) + return; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); + /* Ignore read errors and empty reads. */ + if (XENBUS_IS_ERR_READ(str)) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) { + kfree(str); + goto again; + } + + if (strcmp(str, "poweroff") == 0) + new_state = SHUTDOWN_POWEROFF; + else if (strcmp(str, "reboot") == 0) + ctrl_alt_del(); +#ifdef CONFIG_PM_SLEEP + else if (strcmp(str, "suspend") == 0) + new_state = SHUTDOWN_SUSPEND; +#endif + else if (strcmp(str, "halt") == 0) + new_state = SHUTDOWN_HALT; + else + printk("Ignoring shutdown request: %s\n", str); + + switch_shutdown_state(new_state); + + kfree(str); +} + +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char sysrq_key = '\0'; + struct xenbus_transaction xbt; + int err; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { + printk(KERN_ERR "Unable to read sysrq code in " + "control/sysrq\n"); + xenbus_transaction_end(xbt, 1); + return; + } + + if (sysrq_key != '\0') + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + +#ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_key != '\0') + handle_sysrq(sysrq_key, NULL); +#endif +} + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static struct xenbus_watch sysrq_watch = { + .node = "control/sysrq", + .callback = sysrq_handler +}; + +#ifdef CONFIG_PM_SLEEP +static irqreturn_t suspend_int(int irq, void* dev_id) +{ + switch_shutdown_state(SHUTDOWN_SUSPEND); + return IRQ_HANDLED; +} + +static int setup_suspend_evtchn(void) +{ + static int irq; + int port; + char portstr[16]; + + if (irq > 0) + unbind_from_irqhandler(irq, NULL); + + irq = bind_listening_port_to_irqhandler(0, suspend_int, 0, "suspend", + NULL); + if (irq <= 0) + return -1; + + port = irq_to_evtchn_port(irq); + printk(KERN_INFO "suspend: event channel %d\n", port); + sprintf(portstr, "%d", port); + xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr); + + return 0; +} +#else +#define setup_suspend_evtchn() 0 +#endif + +static int setup_shutdown_watcher(void) +{ + int err; + + xenbus_scanf(XBT_NIL, "control", + "platform-feature-multiprocessor-suspend", + "%d", &fast_suspend); + + err = register_xenbus_watch(&shutdown_watch); + if (err) { + printk(KERN_ERR "Failed to set shutdown watcher\n"); + return err; + } + + err = register_xenbus_watch(&sysrq_watch); + if (err) { + printk(KERN_ERR "Failed to set sysrq watcher\n"); + return err; + } + + /* suspend event channel */ + err = setup_suspend_evtchn(); + if (err) { + printk(KERN_ERR "Failed to register suspend event channel\n"); + return err; + } + + return 0; +} + +#ifdef CONFIG_XEN + +static int shutdown_event(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + setup_shutdown_watcher(); + return NOTIFY_DONE; +} + +static int __init setup_shutdown_event(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = shutdown_event + }; + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(setup_shutdown_event); + +#else /* !defined(CONFIG_XEN) */ + +int xen_reboot_init(void) +{ + return setup_shutdown_watcher(); +} + +#endif /* !defined(CONFIG_XEN) */ --- linux-ec2-2.6.31.orig/drivers/xen/core/machine_reboot.c +++ linux-ec2-2.6.31/drivers/xen/core/machine_reboot.c @@ -0,0 +1,283 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../base/base.h" + +#if defined(__i386__) || defined(__x86_64__) +#include +/* TBD: Dom0 should propagate the determined value to Xen. */ +bool port_cf9_safe = false; + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void machine_emergency_restart(void) +{ + /* We really want to get pending console data out before we die. */ + xencons_force_flush(); + HYPERVISOR_shutdown(SHUTDOWN_reboot); +} + +void machine_restart(char * __unused) +{ + machine_emergency_restart(); +} + +void machine_halt(void) +{ + machine_power_off(); +} + +void machine_power_off(void) +{ + /* We really want to get pending console data out before we die. */ + xencons_force_flush(); + if (pm_power_off) + pm_power_off(); + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +int reboot_thru_bios = 0; /* for dmi_scan.c */ +EXPORT_SYMBOL(machine_restart); +EXPORT_SYMBOL(machine_halt); +EXPORT_SYMBOL(machine_power_off); + +#ifdef CONFIG_PM_SLEEP +static void pre_suspend(void) +{ + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; + WARN_ON(HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), + __pte_ma(0), 0)); + + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); +} + +static void post_suspend(int suspend_cancelled) +{ + int i, j, k, fpp; + unsigned long shinfo_mfn; + extern unsigned long max_pfn; + extern unsigned long *pfn_to_mfn_frame_list_list; + extern unsigned long **pfn_to_mfn_frame_list; + + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + cpumask_copy(vcpu_initialized_mask, cpu_online_mask); +#endif + } + + shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), + pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), + 0)) + BUG(); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + + memset(empty_zero_page, 0, PAGE_SIZE); + + fpp = PAGE_SIZE/sizeof(unsigned long); + for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { + if ((j % fpp) == 0) { + k++; + pfn_to_mfn_frame_list_list[k] = + virt_to_mfn(pfn_to_mfn_frame_list[k]); + j = 0; + } + pfn_to_mfn_frame_list[k][j] = + virt_to_mfn(&phys_to_machine_mapping[i]); + } + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(pfn_to_mfn_frame_list_list); +} +#endif + +#else /* !(defined(__i386__) || defined(__x86_64__)) */ + +#ifndef HAVE_XEN_PRE_SUSPEND +#define xen_pre_suspend() ((void)0) +#endif + +#ifndef HAVE_XEN_POST_SUSPEND +#define xen_post_suspend(x) ((void)0) +#endif + +#define switch_idle_mm() ((void)0) +#define mm_pin_all() ((void)0) +#define pre_suspend() xen_pre_suspend() +#define post_suspend(x) xen_post_suspend(x) + +#endif + +#ifdef CONFIG_PM_SLEEP +struct suspend { + int fast_suspend; + void (*resume_notifier)(int); +}; + +static int take_machine_down(void *_suspend) +{ + struct suspend *suspend = _suspend; + int suspend_cancelled, err; + + if (suspend->fast_suspend) { + BUG_ON(!irqs_disabled()); + } else { + BUG_ON(irqs_disabled()); + + for (;;) { + err = smp_suspend(); + if (err) + return err; + + xenbus_suspend(); + preempt_disable(); + + if (num_online_cpus() == 1) + break; + + preempt_enable(); + xenbus_suspend_cancel(); + } + + local_irq_disable(); + } + + mm_pin_all(); + suspend_cancelled = sysdev_suspend(PMSG_SUSPEND); + if (!suspend_cancelled) { + pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled or the domain was + * merely checkpointed, and 0 if it is resuming in a new domain. + */ + suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + if (!suspend_cancelled) { + unsigned int cpu; + + for_each_possible_cpu(cpu) { + if (suspend->fast_suspend + && cpu != smp_processor_id() + && HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); + + setup_vcpu_info(cpu); + + if (suspend->fast_suspend + && cpu != smp_processor_id() + && HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); + } + } + } else + BUG_ON(suspend_cancelled > 0); + suspend->resume_notifier(suspend_cancelled); + if (suspend_cancelled >= 0) { + post_suspend(suspend_cancelled); + sysdev_resume(); + } + if (!suspend_cancelled) { +#ifdef __x86_64__ + /* + * Older versions of Xen do not save/restore the user %cr3. + * We do it here just in case, but there's no need if we are + * in fast-suspend mode as that implies a new enough Xen. + */ + if (!suspend->fast_suspend) + xen_new_user_pt(current->active_mm->pgd); +#endif + } + + if (!suspend->fast_suspend) + local_irq_enable(); + + return suspend_cancelled; +} + +int __xen_suspend(int fast_suspend, void (*resume_notifier)(int)) +{ + int err, suspend_cancelled; + struct suspend suspend; + + BUG_ON(smp_processor_id() != 0); + BUG_ON(in_interrupt()); + +#if defined(__i386__) || defined(__x86_64__) + if (xen_feature(XENFEAT_auto_translated_physmap)) { + printk(KERN_WARNING "Cannot suspend in " + "auto_translated_physmap mode.\n"); + return -EOPNOTSUPP; + } +#endif + + err = dpm_suspend_noirq(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "dpm_suspend_noirq() failed: %d\n", err); + return err; + } + + /* If we are definitely UP then 'slow mode' is actually faster. */ + if (num_possible_cpus() == 1) + fast_suspend = 0; + + suspend.fast_suspend = fast_suspend; + suspend.resume_notifier = resume_notifier; + + if (fast_suspend) { + xenbus_suspend(); + err = stop_machine(take_machine_down, &suspend, + &cpumask_of_cpu(0)); + if (err < 0) + xenbus_suspend_cancel(); + } else { + err = take_machine_down(&suspend); + } + + if (err < 0) { + dpm_resume_noirq(PMSG_RESUME); + return err; + } + + suspend_cancelled = err; + if (!suspend_cancelled) { + xencons_resume(); + xenbus_resume(); + } else { + xenbus_suspend_cancel(); + } + + if (!fast_suspend) + smp_resume(); + + dpm_resume_noirq(PMSG_RESUME); + + return 0; +} +#endif --- linux-ec2-2.6.31.orig/drivers/xen/core/xencomm.c +++ linux-ec2-2.6.31/drivers/xen/core/xencomm.c @@ -0,0 +1,229 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include +#ifdef __ia64__ +#include /* for is_kern_addr() */ +#endif + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +static int xencomm_init(struct xencomm_desc *desc, + void *buffer, unsigned long bytes) +{ + unsigned long recorded = 0; + int i = 0; + + while ((recorded < bytes) && (i < desc->nr_addrs)) { + unsigned long vaddr = (unsigned long)buffer + recorded; + unsigned long paddr; + int offset; + int chunksz; + + offset = vaddr % PAGE_SIZE; /* handle partial pages */ + chunksz = min(PAGE_SIZE - offset, bytes - recorded); + + paddr = xencomm_vtop(vaddr); + if (paddr == ~0UL) { + printk("%s: couldn't translate vaddr %lx\n", + __func__, vaddr); + return -EINVAL; + } + + desc->address[i++] = paddr; + recorded += chunksz; + } + + if (recorded < bytes) { + printk("%s: could only translate %ld of %ld bytes\n", + __func__, recorded, bytes); + return -ENOSPC; + } + + /* mark remaining addresses invalid (just for safety) */ + while (i < desc->nr_addrs) + desc->address[i++] = XENCOMM_INVALID; + + desc->magic = XENCOMM_MAGIC; + + return 0; +} + +static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask, + void *buffer, unsigned long bytes) +{ + struct xencomm_desc *desc; + unsigned long buffer_ulong = (unsigned long)buffer; + unsigned long start = buffer_ulong & PAGE_MASK; + unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK; + unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT; + unsigned long size = sizeof(*desc) + + sizeof(desc->address[0]) * nr_addrs; + + /* + * slab allocator returns at least sizeof(void*) aligned pointer. + * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might + * cross page boundary. + */ + if (sizeof(*desc) > sizeof(void*)) { + unsigned long order = get_order(size); + desc = (struct xencomm_desc *)__get_free_pages(gfp_mask, + order); + if (desc == NULL) + return NULL; + + desc->nr_addrs = + ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) / + sizeof(*desc->address); + } else { + desc = kmalloc(size, gfp_mask); + if (desc == NULL) + return NULL; + + desc->nr_addrs = nr_addrs; + } + return desc; +} + +void xencomm_free(struct xencomm_handle *desc) +{ + if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) { + struct xencomm_desc *desc__ = (struct xencomm_desc*)desc; + if (sizeof(*desc__) > sizeof(void*)) { + unsigned long size = sizeof(*desc__) + + sizeof(desc__->address[0]) * desc__->nr_addrs; + unsigned long order = get_order(size); + free_pages((unsigned long)__va(desc), order); + } else + kfree(__va(desc)); + } +} + +static int xencomm_create(void *buffer, unsigned long bytes, struct xencomm_desc **ret, gfp_t gfp_mask) +{ + struct xencomm_desc *desc; + int rc; + + pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes); + + if (bytes == 0) { + /* don't create a descriptor; Xen recognizes NULL. */ + BUG_ON(buffer != NULL); + *ret = NULL; + return 0; + } + + BUG_ON(buffer == NULL); /* 'bytes' is non-zero */ + + desc = xencomm_alloc(gfp_mask, buffer, bytes); + if (!desc) { + printk("%s failure\n", "xencomm_alloc"); + return -ENOMEM; + } + + rc = xencomm_init(desc, buffer, bytes); + if (rc) { + printk("%s failure: %d\n", "xencomm_init", rc); + xencomm_free((struct xencomm_handle *)__pa(desc)); + return rc; + } + + *ret = desc; + return 0; +} + +/* check if memory address is within VMALLOC region */ +static int is_phys_contiguous(unsigned long addr) +{ + if (!is_kernel_addr(addr)) + return 0; + + return (addr < VMALLOC_START) || (addr >= VMALLOC_END); +} + +static struct xencomm_handle *xencomm_create_inline(void *ptr) +{ + unsigned long paddr; + + BUG_ON(!is_phys_contiguous((unsigned long)ptr)); + + paddr = (unsigned long)xencomm_pa(ptr); + BUG_ON(paddr & XENCOMM_INLINE_FLAG); + return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); +} + +/* "mini" routine, for stack-based communications: */ +static int xencomm_create_mini(void *buffer, + unsigned long bytes, struct xencomm_mini *xc_desc, + struct xencomm_desc **ret) +{ + int rc = 0; + struct xencomm_desc *desc; + BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0); + + desc = (void *)xc_desc; + + desc->nr_addrs = XENCOMM_MINI_ADDRS; + + if (!(rc = xencomm_init(desc, buffer, bytes))) + *ret = desc; + + return rc; +} + +struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes) +{ + int rc; + struct xencomm_desc *desc; + + if (is_phys_contiguous((unsigned long)ptr)) + return xencomm_create_inline(ptr); + + rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL); + + if (rc || desc == NULL) + return NULL; + + return xencomm_pa(desc); +} + +struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes, + struct xencomm_mini *xc_desc) +{ + int rc; + struct xencomm_desc *desc = NULL; + + if (is_phys_contiguous((unsigned long)ptr)) + return xencomm_create_inline(ptr); + + rc = xencomm_create_mini(ptr, bytes, xc_desc, + &desc); + + if (rc) + return NULL; + + return xencomm_pa(desc); +} --- linux-ec2-2.6.31.orig/drivers/xen/core/gnttab.c +++ linux-ec2-2.6.31/drivers/xen/core/gnttab.c @@ -0,0 +1,884 @@ +/****************************************************************************** + * gnttab.c + * + * Granting foreign access to our memory reservation. + * + * Copyright (c) 2005-2006, Christopher Clark + * Copyright (c) 2004-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +/* External tools reserve first few grant table entries. */ +#define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) + +static grant_ref_t **gnttab_list; +static unsigned int nr_grant_frames; +static unsigned int boot_max_nr_grant_frames; +static int gnttab_free_count; +static grant_ref_t gnttab_free_head; +static DEFINE_SPINLOCK(gnttab_list_lock); + +static struct grant_entry *shared; + +static struct gnttab_free_callback *gnttab_free_callback_list; + +static int gnttab_expand(unsigned int req_entries); + +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) + +#define nr_freelist_frames(grant_frames) \ + (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP) + +static int get_free_entries(int count) +{ + unsigned long flags; + int ref, rc; + grant_ref_t head; + + spin_lock_irqsave(&gnttab_list_lock, flags); + + if ((gnttab_free_count < count) && + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return rc; + } + + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_entry(head); + gnttab_free_head = gnttab_entry(head); + gnttab_entry(head) = GNTTAB_LIST_END; + + spin_unlock_irqrestore(&gnttab_list_lock, flags); + + return ref; +} + +#define get_free_entry() get_free_entries(1) + +static void do_free_callbacks(void) +{ + struct gnttab_free_callback *callback, *next; + + callback = gnttab_free_callback_list; + gnttab_free_callback_list = NULL; + + while (callback != NULL) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->queued = 0; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } +} + +static inline void check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list)) + do_free_callbacks(); +} + +static void put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +/* + * Public grant-issuing interface functions + */ + +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int flags) +{ + int ref; + + if (unlikely((ref = get_free_entry()) < 0)) + return -ENOSPC; + + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); + shared[ref].flags = GTF_permit_access | flags; + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); + +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags) +{ + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); + shared[ref].flags = GTF_permit_access | flags; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); + + +int gnttab_query_foreign_access(grant_ref_t ref) +{ + u16 nflags; + + nflags = shared[ref].flags; + + return (nflags & (GTF_reading|GTF_writing)); +} +EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); + +int gnttab_end_foreign_access_ref(grant_ref_t ref) +{ + u16 flags, nflags; + + nflags = shared[ref].flags; + do { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { + printk(KERN_DEBUG "WARNING: g.e. still in use!\n"); + return 0; + } + } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) != + flags); + + return 1; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); + +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page) +{ + if (gnttab_end_foreign_access_ref(ref)) { + put_free_entry(ref); + if (page != 0) + free_page(page); + } else { + /* XXX This needs to be fixed so that the ref and page are + placed on a list to be freed up later. */ + printk(KERN_DEBUG + "WARNING: leaking g.e. and page still in use!\n"); + } +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); + +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) +{ + int ref; + + if (unlikely((ref = get_free_entry()) < 0)) + return -ENOSPC; + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) +{ + shared[ref].frame = pfn; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_accept_transfer; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { + if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags) + return 0; + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = shared[ref].flags; + cpu_relax(); + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = shared[ref].frame; + BUG_ON(frame == 0); + + return frame; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); + +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) +{ + unsigned long frame = gnttab_end_foreign_transfer_ref(ref); + put_free_entry(ref); + return frame; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); + +void gnttab_free_grant_reference(grant_ref_t ref) +{ + put_free_entry(ref); +} +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); + +void gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + unsigned long flags; + int count = 1; + if (head == GNTTAB_LIST_END) + return; + spin_lock_irqsave(&gnttab_list_lock, flags); + ref = head; + while (gnttab_entry(ref) != GNTTAB_LIST_END) { + ref = gnttab_entry(ref); + count++; + } + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_free_grant_references); + +int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) +{ + int h = get_free_entries(count); + + if (h < 0) + return -ENOSPC; + + *head = h; + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); + +int gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); +} +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); + +int gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + if (unlikely(g == GNTTAB_LIST_END)) + return -ENOSPC; + *private_head = gnttab_entry(g); + return g; +} +EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); + +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release) +{ + gnttab_entry(release) = *private_head; + *private_head = release; +} +EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); + +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (callback->queued) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->queued = 1; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); +out: + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_request_free_callback); + +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) +{ + struct gnttab_free_callback **pcb; + unsigned long flags; + + spin_lock_irqsave(&gnttab_list_lock, flags); + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { + if (*pcb == callback) { + *pcb = callback->next; + callback->queued = 0; + break; + } + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); + +static int grow_gnttab_list(unsigned int more_frames) +{ + unsigned int new_nr_grant_frames, extra_entries, i; + unsigned int nr_glist_frames, new_nr_glist_frames; + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * ENTRIES_PER_GRANT_FRAME; + + nr_glist_frames = nr_freelist_frames(nr_grant_frames); + new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames); + for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); + if (!gnttab_list[i]) + goto grow_nomem; + } + + for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames; + i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(i) = gnttab_free_head; + gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return 0; + +grow_nomem: + for ( ; i >= nr_glist_frames; i--) + free_page((unsigned long) gnttab_list[i]); + return -ENOMEM; +} + +static unsigned int __max_nr_grant_frames(void) +{ + struct gnttab_query_size query; + int rc; + + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return 4; /* Legacy max supported number of frames */ + + return query.max_nr_frames; +} + +static inline unsigned int max_nr_grant_frames(void) +{ + unsigned int xen_max = __max_nr_grant_frames(); + + if (xen_max > boot_max_nr_grant_frames) + return boot_max_nr_grant_frames; + return xen_max; +} + +#ifdef CONFIG_XEN + +static DEFINE_SEQLOCK(gnttab_dma_lock); + +#ifdef CONFIG_X86 +static int map_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} +#endif + +void *arch_gnttab_alloc_shared(unsigned long *frames) +{ + struct vm_struct *area; + area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); + BUG_ON(area == NULL); + return area->addr; +} +#endif /* CONFIG_X86 */ + +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct gnttab_setup_table setup; + unsigned long *frames; + unsigned int nr_gframes = end_idx + 1; + int rc; + + frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); + if (!frames) + return -ENOMEM; + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_gframes; + set_xen_guest_handle(setup.frame_list, frames); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (rc == -ENOSYS) { + kfree(frames); + return -ENOSYS; + } + + BUG_ON(rc || setup.status); + + if (shared == NULL) + shared = arch_gnttab_alloc_shared(frames); + +#ifdef CONFIG_X86 + rc = apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_gframes, + map_pte_fn, &frames); + BUG_ON(rc); + frames -= nr_gframes; /* adjust after map_pte_fn() */ +#endif /* CONFIG_X86 */ + + kfree(frames); + + return 0; +} + +static void gnttab_page_free(struct page *page, unsigned int order) +{ + BUG_ON(order); + ClearPageForeign(page); + gnttab_reset_grant_page(page); + put_page(page); +} + +/* + * Must not be called with IRQs off. This should only be used on the + * slow path. + * + * Copy a foreign granted page to local memory. + */ +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) +{ + struct gnttab_unmap_and_replace unmap; + mmu_update_t mmu; + struct page *page; + struct page *new_page; + void *new_addr; + void *addr; + paddr_t pfn; + maddr_t mfn; + maddr_t new_mfn; + int err; + + page = *pagep; + if (!get_page_unless_zero(page)) + return -ENOENT; + + err = -ENOMEM; + new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (!new_page) + goto out; + + new_addr = page_address(new_page); + addr = page_address(page); + memcpy(new_addr, addr, PAGE_SIZE); + + pfn = page_to_pfn(page); + mfn = pfn_to_mfn(pfn); + new_mfn = virt_to_mfn(new_addr); + + write_seqlock(&gnttab_dma_lock); + + /* Make seq visible before checking page_mapped. */ + smp_mb(); + + /* Has the page been DMA-mapped? */ + if (unlikely(page_mapped(page))) { + write_sequnlock(&gnttab_dma_lock); + put_page(new_page); + err = -EBUSY; + goto out; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + set_phys_to_machine(pfn, new_mfn); + + gnttab_set_replace_op(&unmap, (unsigned long)addr, + (unsigned long)new_addr, ref); + + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, + &unmap, 1); + BUG_ON(err); + BUG_ON(unmap.status); + + write_sequnlock(&gnttab_dma_lock); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); + + mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu.val = pfn; + err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); + BUG_ON(err); + } + + new_page->mapping = page->mapping; + new_page->index = page->index; + set_bit(PG_foreign, &new_page->flags); + *pagep = new_page; + + SetPageForeign(page, gnttab_page_free); + page->mapping = NULL; + +out: + put_page(page); + return err; +} +EXPORT_SYMBOL_GPL(gnttab_copy_grant_page); + +void gnttab_reset_grant_page(struct page *page) +{ + init_page_count(page); + reset_page_mapcount(page); +} +EXPORT_SYMBOL_GPL(gnttab_reset_grant_page); + +/* + * Keep track of foreign pages marked as PageForeign so that we don't + * return them to the remote domain prematurely. + * + * PageForeign pages are pinned down by increasing their mapcount. + * + * All other pages are simply returned as is. + */ +void __gnttab_dma_map_page(struct page *page) +{ + unsigned int seq; + + if (!is_running_on_xen() || !PageForeign(page)) + return; + + do { + seq = read_seqbegin(&gnttab_dma_lock); + + if (gnttab_dma_local_pfn(page)) + break; + + atomic_set(&page->_mapcount, 0); + + /* Make _mapcount visible before read_seqretry. */ + smp_mb(); + } while (unlikely(read_seqretry(&gnttab_dma_lock, seq))); +} + +#ifdef __HAVE_ARCH_PTE_SPECIAL + +static unsigned int GNTMAP_pte_special; + +bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *map, + unsigned int count) +{ + unsigned int i; + + if (unlikely(cmd != GNTTABOP_map_grant_ref)) + count = 0; + + for (i = 0; i < count; ++i, ++map) { + if (!(map->flags & GNTMAP_host_map) + || !(map->flags & GNTMAP_application_map)) + continue; + if (GNTMAP_pte_special) + map->flags |= GNTMAP_pte_special; + else { + BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); + return true; + } + } + + return false; +} +EXPORT_SYMBOL(gnttab_pre_map_adjust); + +#if CONFIG_XEN_COMPAT < 0x030400 +int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count) +{ + unsigned int i; + int rc = 0; + + for (i = 0; i < count && rc == 0; ++i, ++map) { + pte_t pte; + + if (!(map->flags & GNTMAP_host_map) + || !(map->flags & GNTMAP_application_map)) + continue; + +#ifdef CONFIG_X86 + pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER + | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX + | _PAGE_SPECIAL) + & __supported_pte_mask); +#else +#error Architecture not yet supported. +#endif + if (!(map->flags & GNTMAP_readonly)) + pte = pte_mkwrite(pte); + + if (map->flags & GNTMAP_contains_pte) { + mmu_update_t u; + + u.ptr = map->host_addr; + u.val = __pte_val(pte); + rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); + } else + rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0); + } + + return rc; +} +EXPORT_SYMBOL(gnttab_post_map_adjust); +#endif + +#endif /* __HAVE_ARCH_PTE_SPECIAL */ + +static int gnttab_resume(struct sys_device *dev) +{ + if (max_nr_grant_frames() < nr_grant_frames) + return -ENOSYS; + return gnttab_map(0, nr_grant_frames - 1); +} +#define gnttab_resume() gnttab_resume(NULL) + +#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_X86 +static int gnttab_suspend(struct sys_device *dev, pm_message_t state) +{ + apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_grant_frames, + unmap_pte_fn, NULL); + return 0; +} +#else +#define gnttab_suspend NULL +#endif + +static struct sysdev_class gnttab_sysclass = { + .name = "gnttab", + .resume = gnttab_resume, + .suspend = gnttab_suspend, +}; + +static struct sys_device device_gnttab = { + .id = 0, + .cls = &gnttab_sysclass, +}; +#endif + +#else /* !CONFIG_XEN */ + +#include + +static unsigned long resume_frames; + +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct xen_add_to_physmap xatp; + unsigned int i = end_idx; + + /* Loop backwards, so that the first hypercall has the largest index, + * ensuring that the table will grow only once. + */ + do { + xatp.domid = DOMID_SELF; + xatp.idx = i; + xatp.space = XENMAPSPACE_grant_table; + xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + } while (i-- > start_idx); + + return 0; +} + +int gnttab_resume(void) +{ + unsigned int max_nr_gframes, nr_gframes; + + nr_gframes = nr_grant_frames; + max_nr_gframes = max_nr_grant_frames(); + if (max_nr_gframes < nr_gframes) + return -ENOSYS; + + if (!resume_frames) { + resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); + shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); + if (shared == NULL) { + printk("error to ioremap gnttab share frames\n"); + return -1; + } + } + + gnttab_map(0, nr_gframes - 1); + + return 0; +} + +#endif /* !CONFIG_XEN */ + +static int gnttab_expand(unsigned int req_entries) +{ + int rc; + unsigned int cur, extra; + + cur = nr_grant_frames; + extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) / + ENTRIES_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return -ENOSPC; + + if ((rc = gnttab_map(cur, cur + extra - 1)) == 0) + rc = grow_gnttab_list(extra); + + return rc; +} + +int __devinit gnttab_init(void) +{ + int i; + unsigned int max_nr_glist_frames, nr_glist_frames; + unsigned int nr_init_grefs; + + if (!is_running_on_xen()) + return -ENODEV; + +#if defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP) + if (!is_initial_xendomain()) { + int err = sysdev_class_register(&gnttab_sysclass); + + if (!err) + err = sysdev_register(&device_gnttab); + if (err) + return err; + } +#endif + + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + + /* Determine the maximum number of frames required for the + * grant reference free list on the current hypervisor. + */ + max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames); + + gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), + GFP_KERNEL); + if (gnttab_list == NULL) + return -ENOMEM; + + nr_glist_frames = nr_freelist_frames(nr_grant_frames); + for (i = 0; i < nr_glist_frames; i++) { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); + if (gnttab_list[i] == NULL) + goto ini_nomem; + } + + if (gnttab_resume() < 0) + return -ENODEV; + + nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME; + + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; + gnttab_free_head = NR_RESERVED_ENTRIES; + +#if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL) + if (!xen_feature(XENFEAT_auto_translated_physmap) + && xen_feature(XENFEAT_gnttab_map_avail_bits)) { +#ifdef CONFIG_X86 + GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0))) + >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0; +#else +#error Architecture not yet supported. +#endif + } +#endif + + return 0; + + ini_nomem: + for (i--; i >= 0; i--) + free_page((unsigned long)gnttab_list[i]); + kfree(gnttab_list); + return -ENOMEM; +} + +#ifdef CONFIG_XEN +core_initcall(gnttab_init); +#endif --- linux-ec2-2.6.31.orig/drivers/xen/core/Makefile +++ linux-ec2-2.6.31/drivers/xen/core/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for the linux kernel. +# + +obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o domctl.o + +obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_PROC_FS) += xen_proc.o +obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o +obj-$(CONFIG_SMP) += spinlock.o +obj-$(CONFIG_KEXEC) += machine_kexec.o +obj-$(CONFIG_XEN_XENCOMM) += xencomm.o --- linux-ec2-2.6.31.orig/drivers/xen/core/hypervisor_sysfs.c +++ linux-ec2-2.6.31/drivers/xen/core/hypervisor_sysfs.c @@ -0,0 +1,57 @@ +/* + * copyright (c) 2006 IBM Corporation + * Authored by: Mike D. Day + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +static ssize_t hyp_sysfs_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->show) + return hyp_attr->show(hyp_attr, buffer); + return 0; +} + +static ssize_t hyp_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t len) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->store) + return hyp_attr->store(hyp_attr, buffer, len); + return 0; +} + +static struct sysfs_ops hyp_sysfs_ops = { + .show = hyp_sysfs_show, + .store = hyp_sysfs_store, +}; + +static struct kobj_type hyp_sysfs_kobj_type = { + .sysfs_ops = &hyp_sysfs_ops, +}; + +static int __init hypervisor_subsys_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; + return 0; +} + +device_initcall(hypervisor_subsys_init); --- linux-ec2-2.6.31.orig/drivers/xen/core/firmware.c +++ linux-ec2-2.6.31/drivers/xen/core/firmware.c @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include